Message ID | 20231121094844.5764-4-quic_ekangupt@quicinc.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
On 21/11/2023 09:48, Ekansh Gupta wrote: > Add support to capture kernel performance counters for different > kernel level operations. These counters collects the information > for remote call and copies the information to a buffer shared > by user. > > Collection of DSP performance counters is also added as part of > this change. DSP updates the performance information in the > metadata which is then copied to a buffer passed by the user. > > Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com> > --- > Changes in v2: > - Fixed compile time warnings > Changes in v3: > - Squashed commits to get proper patch series > Changes in v7: > - Rebase the patch to latest kernel version > > drivers/misc/fastrpc.c | 141 ++++++++++++++++++++++++++++++++++-- > include/uapi/misc/fastrpc.h | 14 ++++ > 2 files changed, 147 insertions(+), 8 deletions(-) > > diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.cin:sent > index 55f126c779cb..cbcac0b3d09b 100644 > --- a/drivers/misc/fastrpc.c > +++ b/drivers/misc/fastrpc.c > @@ -19,6 +19,7 @@ > #include <linux/rpmsg.h> > #include <linux/scatterlist.h> > #include <linux/slab.h> > +#include <linux/delay.h> > #include <linux/firmware/qcom/qcom_scm.h> > #include <uapi/misc/fastrpc.h> > #include <linux/of_reserved_mem.h> > @@ -33,6 +34,8 @@ > #define FASTRPC_ALIGN 128 > #define FASTRPC_MAX_FDLIST 16 > #define FASTRPC_MAX_CRCLIST 64 > +#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX) > +#define FASTRPC_DSP_PERF_LIST 12 > #define FASTRPC_PHYS(p) ((p) & 0xffffffff) > #define FASTRPC_CTX_MAX (256) > #define FASTRPC_INIT_HANDLE 1 > @@ -105,6 +108,27 @@ > > #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) > > +#define PERF_END ((void)0) > + > +#define PERF(enb, cnt, ff) \ > + {\ > + struct timespec64 startT = {0};\ > + uint64_t *counter = cnt;\ > + if (enb && counter) {\ > + ktime_get_real_ts64(&startT);\ > + } \ > + ff ;\ > + if (enb && counter) {\ > + *counter += getnstimediff(&startT);\ > + } \ > + } > + > +#define GET_COUNTER(perf_ptr, offset) \ > + (perf_ptr != NULL ?\ > + (((offset >= 0) && (offset < PERF_KEY_MAX)) ?\ > + (uint64_t *)(perf_ptr + offset)\ > + : (uint64_t *)NULL) : (uint64_t *)NULL) > + > static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp", > "sdsp", "cdsp"}; > struct fastrpc_phy_page { > @@ -228,6 +252,19 @@ struct fastrpc_map { > struct kref refcount; > }; > > +struct fastrpc_perf { > + u64 count; > + u64 flush; > + u64 map; > + u64 copy; > + u64 link; > + u64 getargs; > + u64 putargs; > + u64 invargs; > + u64 invoke; > + u64 tid; > +}; > + > struct fastrpc_invoke_ctx { > int nscalars; > int nbufs; > @@ -236,6 +273,8 @@ struct fastrpc_invoke_ctx { > int tgid; > u32 sc; > u32 *crc; > + u64 *perf_kernel; > + u64 *perf_dsp; > u64 ctxid; > u64 msg_sz; > struct kref refcount; > @@ -250,6 +289,7 @@ struct fastrpc_invoke_ctx { > struct fastrpc_invoke_args *args; > struct fastrpc_buf_overlap *olaps; > struct fastrpc_channel_ctx *cctx; > + struct fastrpc_perf *perf; > }; > > struct fastrpc_session_ctx { > @@ -299,6 +339,7 @@ struct fastrpc_user { > struct fastrpc_session_ctx *sctx; > struct fastrpc_buf *init_mem; > > + u32 profile; > int tgid; > int pd; > bool is_secure_dev; > @@ -308,6 +349,17 @@ struct fastrpc_user { > struct mutex mutex; > }; > > +static inline int64_t getnstimediff(struct timespec64 *start) > +{ > + int64_t ns; > + struct timespec64 ts, b; > + > + ktime_get_real_ts64(&ts); > + b = timespec64_sub(ts, *start); > + ns = timespec64_to_ns(&b); > + return ns; > +} > + > static void fastrpc_free_map(struct kref *ref) > { > struct fastrpc_map *map; > @@ -493,6 +545,9 @@ static void fastrpc_context_free(struct kref *ref) > if (ctx->buf) > fastrpc_buf_free(ctx->buf); > > + if (ctx->fl->profile) > + kfree(ctx->perf); > + > spin_lock_irqsave(&cctx->lock, flags); > idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4); > spin_unlock_irqrestore(&cctx->lock, flags); > @@ -612,6 +667,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc( > fastrpc_channel_ctx_get(cctx); > > ctx->crc = (u32 *)(uintptr_t)invoke->crc; > + ctx->perf_dsp = (u64 *)(uintptr_t)invoke->perf_dsp; > + ctx->perf_kernel = (u64 *)(uintptr_t)invoke->perf_kernel; > + if (ctx->fl->profile) { > + ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL); > + if (!ctx->perf) > + return ERR_PTR(-ENOMEM); > + ctx->perf->tid = ctx->fl->tgid; > + } > ctx->sc = sc; > ctx->retval = -1; > ctx->pid = current->pid; > @@ -875,7 +938,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) > sizeof(struct fastrpc_invoke_buf) + > sizeof(struct fastrpc_phy_page)) * ctx->nscalars + > sizeof(u64) * FASTRPC_MAX_FDLIST + > - sizeof(u32) * FASTRPC_MAX_CRCLIST; > + sizeof(u32) * FASTRPC_MAX_CRCLIST + > + sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST; > > return size; > } > @@ -942,16 +1006,22 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > int inbufs, i, oix, err = 0; > u64 len, rlen, pkt_size; > u64 pg_start, pg_end; > + u64 *perf_counter = NULL; > uintptr_t args; > int metalen; > > + if (ctx->fl->profile) > + perf_counter = (u64 *)ctx->perf + PERF_COUNT; > + > inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); > metalen = fastrpc_get_meta_size(ctx); > pkt_size = fastrpc_get_payload_size(ctx, metalen); > > + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP), > err = fastrpc_create_maps(ctx); > if (err) > return err; > + PERF_END); > > ctx->msg_sz = pkt_size; > > @@ -984,6 +1054,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > if (ctx->maps[i]) { > struct vm_area_struct *vma = NULL; > > + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP), > rpra[i].buf.pv = (u64) ctx->args[i].ptr; > pages[i].addr = ctx->maps[i]->phys; > > @@ -998,9 +1069,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >> > PAGE_SHIFT; > pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; > - > + PERF_END); > } else { > - > + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY), > if (ctx->olaps[oix].offset == 0) { > rlen -= ALIGN(args, FASTRPC_ALIGN) - args; > args = ALIGN(args, FASTRPC_ALIGN); > @@ -1022,12 +1093,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; > args = args + mlen; > rlen -= mlen; > + PERF_END); > } > > if (i < inbufs && !ctx->maps[i]) { > void *dst = (void *)(uintptr_t)rpra[i].buf.pv; > void *src = (void *)(uintptr_t)ctx->args[i].ptr; > > + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY), > if (!kernel) { > if (copy_from_user(dst, (void __user *)src, > len)) { > @@ -1037,6 +1110,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > } else { > memcpy(dst, src, len); > } > + PERF_END); > } > } > > @@ -1067,9 +1141,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, > struct fastrpc_map *mmap = NULL; > struct fastrpc_invoke_buf *list; > struct fastrpc_phy_page *pages; > - u64 *fdlist; > - u32 *crclist; > - int i, inbufs, outbufs, handles; > + u64 *fdlist, *perf_dsp_list; > + u32 *crclist, *poll; > + int i, inbufs, outbufs, handles, perferr; > > inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); > outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); > @@ -1078,6 +1152,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, > pages = fastrpc_phy_page_start(list, ctx->nscalars); > fdlist = (u64 *)(pages + inbufs + outbufs + handles); > crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST); > + poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST); > + perf_dsp_list = (u64 *)(poll + 1); > > for (i = inbufs; i < ctx->nbufs; ++i) { > if (!ctx->maps[i]) { > @@ -1103,8 +1179,16 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, > } > > if (ctx->crc && crclist && rpra) { > - if (copy_to_user((void __user *)ctx->crc, crclist, FASTRPC_MAX_CRCLIST * sizeof(u32))) > + if (copy_to_user((void __user *)ctx->crc, crclist, > + FASTRPC_MAX_CRCLIST * sizeof(u32))) { > return -EFAULT; > + } > + } > + if (ctx->perf_dsp && perf_dsp_list) { > + perferr = copy_to_user((void __user *)ctx->perf_dsp, > + perf_dsp_list, FASTRPC_DSP_PERF_LIST * sizeof(u64)); > + if (perferr) > + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); > } > return 0; > } > @@ -1141,6 +1225,21 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx, > > } > > +static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter, > + struct timespec64 *invoket) > +{ > + u64 *invcount, *count; > + > + invcount = GET_COUNTER(perf_counter, PERF_INVOKE); > + if (invcount) > + *invcount += getnstimediff(invoket); > + > + count = GET_COUNTER(perf_counter, PERF_COUNT); > + if (count) > + *count += 1; > +} > + > + > static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > struct fastrpc_enhanced_invoke *invoke) > { > @@ -1148,7 +1247,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > struct fastrpc_buf *buf, *b; > struct fastrpc_invoke *inv = &invoke->inv; > u32 handle, sc; > - int err = 0; > + u64 *perf_counter = NULL; > + int err = 0, perferr = 0; > + struct timespec64 invoket = {0}; > + > + if (fl->profile) > + ktime_get_real_ts64(&invoket); > > if (!fl->sctx) > return -EINVAL; > @@ -1167,16 +1271,22 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > if (IS_ERR(ctx)) > return PTR_ERR(ctx); > > + if (fl->profile) > + perf_counter = (u64 *)ctx->perf + PERF_COUNT; > + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_GETARGS), > err = fastrpc_get_args(kernel, ctx); > if (err) > goto bail; > + PERF_END); > > /* make sure that all CPU memory writes are seen by DSP */ > dma_wmb(); > + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_LINK), > /* Send invoke buffer to remote dsp */ > err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle); > if (err) > goto bail; > + PERF_END); > > if (kernel) { > if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) > @@ -1190,10 +1300,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > > /* make sure that all memory writes by DSP are seen by CPU */ > dma_rmb(); > + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_PUTARGS), > /* populate all the output buffers with results */ > err = fastrpc_put_args(ctx, kernel); > if (err) > goto bail; > + PERF_END); > > /* Check the response from remote dsp */ > err = ctx->retval; > @@ -1214,6 +1326,15 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > list_del(&buf->node); > list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps); > } > + } else if (ctx) { > + if (fl->profile && !err) > + fastrpc_update_invoke_count(handle, perf_counter, &invoket); > + if (fl->profile && ctx->perf && ctx->perf_kernel) { > + perferr = copy_to_user((void __user *)ctx->perf_kernel, > + ctx->perf, FASTRPC_KERNEL_PERF_LIST * sizeof(u64)); > + if (perferr) > + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); > + } > } > > if (err) > @@ -1712,6 +1833,7 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp) > struct fastrpc_invoke_args *args = NULL; > struct fastrpc_ioctl_multimode_invoke invoke; > u32 nscalars; > + u64 *perf_kernel; > int err, i; > > if (copy_from_user(&invoke, argp, sizeof(invoke))) > @@ -1746,6 +1868,9 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp) > return -EFAULT; > } > } > + perf_kernel = (u64 *)(uintptr_t)einv.perf_kernel; > + if (perf_kernel) > + fl->profile = true; > einv.inv.args = (__u64)args; > err = fastrpc_internal_invoke(fl, false, &einv); > kfree(args); > diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h > index 45c15be1de58..074675ee646f 100644 > --- a/include/uapi/misc/fastrpc.h > +++ b/include/uapi/misc/fastrpc.h > @@ -166,4 +166,18 @@ struct fastrpc_ioctl_capability { > __u32 reserved[4]; > }; > > +enum fastrpc_perfkeys { > + PERF_COUNT = 0, > + PERF_RESERVED1 = 1, why reserved in middle of ranges? if you know already pl add the proper name for it. > + PERF_MAP = 2, > + PERF_COPY = 3, > + PERF_LINK = 4, > + PERF_GETARGS = 5, > + PERF_PUTARGS = 6, > + PERF_RESERVED2 = 7, > + PERF_INVOKE = 8, > + PERF_RESERVED3 = 9, > + PERF_KEY_MAX = 10, > +}; > + > #endif /* __QCOM_FASTRPC_H__ */
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 55f126c779cb..cbcac0b3d09b 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -19,6 +19,7 @@ #include <linux/rpmsg.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/delay.h> #include <linux/firmware/qcom/qcom_scm.h> #include <uapi/misc/fastrpc.h> #include <linux/of_reserved_mem.h> @@ -33,6 +34,8 @@ #define FASTRPC_ALIGN 128 #define FASTRPC_MAX_FDLIST 16 #define FASTRPC_MAX_CRCLIST 64 +#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX) +#define FASTRPC_DSP_PERF_LIST 12 #define FASTRPC_PHYS(p) ((p) & 0xffffffff) #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 @@ -105,6 +108,27 @@ #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) +#define PERF_END ((void)0) + +#define PERF(enb, cnt, ff) \ + {\ + struct timespec64 startT = {0};\ + uint64_t *counter = cnt;\ + if (enb && counter) {\ + ktime_get_real_ts64(&startT);\ + } \ + ff ;\ + if (enb && counter) {\ + *counter += getnstimediff(&startT);\ + } \ + } + +#define GET_COUNTER(perf_ptr, offset) \ + (perf_ptr != NULL ?\ + (((offset >= 0) && (offset < PERF_KEY_MAX)) ?\ + (uint64_t *)(perf_ptr + offset)\ + : (uint64_t *)NULL) : (uint64_t *)NULL) + static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp", "sdsp", "cdsp"}; struct fastrpc_phy_page { @@ -228,6 +252,19 @@ struct fastrpc_map { struct kref refcount; }; +struct fastrpc_perf { + u64 count; + u64 flush; + u64 map; + u64 copy; + u64 link; + u64 getargs; + u64 putargs; + u64 invargs; + u64 invoke; + u64 tid; +}; + struct fastrpc_invoke_ctx { int nscalars; int nbufs; @@ -236,6 +273,8 @@ struct fastrpc_invoke_ctx { int tgid; u32 sc; u32 *crc; + u64 *perf_kernel; + u64 *perf_dsp; u64 ctxid; u64 msg_sz; struct kref refcount; @@ -250,6 +289,7 @@ struct fastrpc_invoke_ctx { struct fastrpc_invoke_args *args; struct fastrpc_buf_overlap *olaps; struct fastrpc_channel_ctx *cctx; + struct fastrpc_perf *perf; }; struct fastrpc_session_ctx { @@ -299,6 +339,7 @@ struct fastrpc_user { struct fastrpc_session_ctx *sctx; struct fastrpc_buf *init_mem; + u32 profile; int tgid; int pd; bool is_secure_dev; @@ -308,6 +349,17 @@ struct fastrpc_user { struct mutex mutex; }; +static inline int64_t getnstimediff(struct timespec64 *start) +{ + int64_t ns; + struct timespec64 ts, b; + + ktime_get_real_ts64(&ts); + b = timespec64_sub(ts, *start); + ns = timespec64_to_ns(&b); + return ns; +} + static void fastrpc_free_map(struct kref *ref) { struct fastrpc_map *map; @@ -493,6 +545,9 @@ static void fastrpc_context_free(struct kref *ref) if (ctx->buf) fastrpc_buf_free(ctx->buf); + if (ctx->fl->profile) + kfree(ctx->perf); + spin_lock_irqsave(&cctx->lock, flags); idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4); spin_unlock_irqrestore(&cctx->lock, flags); @@ -612,6 +667,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc( fastrpc_channel_ctx_get(cctx); ctx->crc = (u32 *)(uintptr_t)invoke->crc; + ctx->perf_dsp = (u64 *)(uintptr_t)invoke->perf_dsp; + ctx->perf_kernel = (u64 *)(uintptr_t)invoke->perf_kernel; + if (ctx->fl->profile) { + ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL); + if (!ctx->perf) + return ERR_PTR(-ENOMEM); + ctx->perf->tid = ctx->fl->tgid; + } ctx->sc = sc; ctx->retval = -1; ctx->pid = current->pid; @@ -875,7 +938,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) sizeof(struct fastrpc_invoke_buf) + sizeof(struct fastrpc_phy_page)) * ctx->nscalars + sizeof(u64) * FASTRPC_MAX_FDLIST + - sizeof(u32) * FASTRPC_MAX_CRCLIST; + sizeof(u32) * FASTRPC_MAX_CRCLIST + + sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST; return size; } @@ -942,16 +1006,22 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) int inbufs, i, oix, err = 0; u64 len, rlen, pkt_size; u64 pg_start, pg_end; + u64 *perf_counter = NULL; uintptr_t args; int metalen; + if (ctx->fl->profile) + perf_counter = (u64 *)ctx->perf + PERF_COUNT; + inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); metalen = fastrpc_get_meta_size(ctx); pkt_size = fastrpc_get_payload_size(ctx, metalen); + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP), err = fastrpc_create_maps(ctx); if (err) return err; + PERF_END); ctx->msg_sz = pkt_size; @@ -984,6 +1054,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) if (ctx->maps[i]) { struct vm_area_struct *vma = NULL; + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_MAP), rpra[i].buf.pv = (u64) ctx->args[i].ptr; pages[i].addr = ctx->maps[i]->phys; @@ -998,9 +1069,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >> PAGE_SHIFT; pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; - + PERF_END); } else { - + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY), if (ctx->olaps[oix].offset == 0) { rlen -= ALIGN(args, FASTRPC_ALIGN) - args; args = ALIGN(args, FASTRPC_ALIGN); @@ -1022,12 +1093,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; args = args + mlen; rlen -= mlen; + PERF_END); } if (i < inbufs && !ctx->maps[i]) { void *dst = (void *)(uintptr_t)rpra[i].buf.pv; void *src = (void *)(uintptr_t)ctx->args[i].ptr; + PERF(ctx->fl->profile, GET_COUNTER(perf_counter, PERF_COPY), if (!kernel) { if (copy_from_user(dst, (void __user *)src, len)) { @@ -1037,6 +1110,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) } else { memcpy(dst, src, len); } + PERF_END); } } @@ -1067,9 +1141,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, struct fastrpc_map *mmap = NULL; struct fastrpc_invoke_buf *list; struct fastrpc_phy_page *pages; - u64 *fdlist; - u32 *crclist; - int i, inbufs, outbufs, handles; + u64 *fdlist, *perf_dsp_list; + u32 *crclist, *poll; + int i, inbufs, outbufs, handles, perferr; inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); @@ -1078,6 +1152,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, pages = fastrpc_phy_page_start(list, ctx->nscalars); fdlist = (u64 *)(pages + inbufs + outbufs + handles); crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST); + poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST); + perf_dsp_list = (u64 *)(poll + 1); for (i = inbufs; i < ctx->nbufs; ++i) { if (!ctx->maps[i]) { @@ -1103,8 +1179,16 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, } if (ctx->crc && crclist && rpra) { - if (copy_to_user((void __user *)ctx->crc, crclist, FASTRPC_MAX_CRCLIST * sizeof(u32))) + if (copy_to_user((void __user *)ctx->crc, crclist, + FASTRPC_MAX_CRCLIST * sizeof(u32))) { return -EFAULT; + } + } + if (ctx->perf_dsp && perf_dsp_list) { + perferr = copy_to_user((void __user *)ctx->perf_dsp, + perf_dsp_list, FASTRPC_DSP_PERF_LIST * sizeof(u64)); + if (perferr) + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); } return 0; } @@ -1141,6 +1225,21 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx, } +static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter, + struct timespec64 *invoket) +{ + u64 *invcount, *count; + + invcount = GET_COUNTER(perf_counter, PERF_INVOKE); + if (invcount) + *invcount += getnstimediff(invoket); + + count = GET_COUNTER(perf_counter, PERF_COUNT); + if (count) + *count += 1; +} + + static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_enhanced_invoke *invoke) { @@ -1148,7 +1247,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_buf *buf, *b; struct fastrpc_invoke *inv = &invoke->inv; u32 handle, sc; - int err = 0; + u64 *perf_counter = NULL; + int err = 0, perferr = 0; + struct timespec64 invoket = {0}; + + if (fl->profile) + ktime_get_real_ts64(&invoket); if (!fl->sctx) return -EINVAL; @@ -1167,16 +1271,22 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (IS_ERR(ctx)) return PTR_ERR(ctx); + if (fl->profile) + perf_counter = (u64 *)ctx->perf + PERF_COUNT; + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_GETARGS), err = fastrpc_get_args(kernel, ctx); if (err) goto bail; + PERF_END); /* make sure that all CPU memory writes are seen by DSP */ dma_wmb(); + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_LINK), /* Send invoke buffer to remote dsp */ err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle); if (err) goto bail; + PERF_END); if (kernel) { if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) @@ -1190,10 +1300,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, /* make sure that all memory writes by DSP are seen by CPU */ dma_rmb(); + PERF(fl->profile, GET_COUNTER(perf_counter, PERF_PUTARGS), /* populate all the output buffers with results */ err = fastrpc_put_args(ctx, kernel); if (err) goto bail; + PERF_END); /* Check the response from remote dsp */ err = ctx->retval; @@ -1214,6 +1326,15 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, list_del(&buf->node); list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps); } + } else if (ctx) { + if (fl->profile && !err) + fastrpc_update_invoke_count(handle, perf_counter, &invoket); + if (fl->profile && ctx->perf && ctx->perf_kernel) { + perferr = copy_to_user((void __user *)ctx->perf_kernel, + ctx->perf, FASTRPC_KERNEL_PERF_LIST * sizeof(u64)); + if (perferr) + dev_info(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); + } } if (err) @@ -1712,6 +1833,7 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp) struct fastrpc_invoke_args *args = NULL; struct fastrpc_ioctl_multimode_invoke invoke; u32 nscalars; + u64 *perf_kernel; int err, i; if (copy_from_user(&invoke, argp, sizeof(invoke))) @@ -1746,6 +1868,9 @@ static int fastrpc_multimode_invoke(struct fastrpc_user *fl, char __user *argp) return -EFAULT; } } + perf_kernel = (u64 *)(uintptr_t)einv.perf_kernel; + if (perf_kernel) + fl->profile = true; einv.inv.args = (__u64)args; err = fastrpc_internal_invoke(fl, false, &einv); kfree(args); diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 45c15be1de58..074675ee646f 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -166,4 +166,18 @@ struct fastrpc_ioctl_capability { __u32 reserved[4]; }; +enum fastrpc_perfkeys { + PERF_COUNT = 0, + PERF_RESERVED1 = 1, + PERF_MAP = 2, + PERF_COPY = 3, + PERF_LINK = 4, + PERF_GETARGS = 5, + PERF_PUTARGS = 6, + PERF_RESERVED2 = 7, + PERF_INVOKE = 8, + PERF_RESERVED3 = 9, + PERF_KEY_MAX = 10, +}; + #endif /* __QCOM_FASTRPC_H__ */
Add support to capture kernel performance counters for different kernel level operations. These counters collects the information for remote call and copies the information to a buffer shared by user. Collection of DSP performance counters is also added as part of this change. DSP updates the performance information in the metadata which is then copied to a buffer passed by the user. Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com> --- Changes in v2: - Fixed compile time warnings Changes in v3: - Squashed commits to get proper patch series Changes in v7: - Rebase the patch to latest kernel version drivers/misc/fastrpc.c | 141 ++++++++++++++++++++++++++++++++++-- include/uapi/misc/fastrpc.h | 14 ++++ 2 files changed, 147 insertions(+), 8 deletions(-)