目录
前言
tracepoint简介
添加tracepoint:
使用simpleperf 跟踪自定义的tracepoint
tracepoint 是内核提供的tracing机制,可以通过打开和关闭来probe 相应函数且对内核影响很小,tracepoint目前工作在ftrace框架内,使用ring buffer传输perf数据,针对开发者来说,tracepoint框架提供了现成的宏来帮助开发者定义和增加tracepoints。
我们直接看下系统现成的tracepoint是怎么定义的:
TRACE_EVENT(sched_switch,
TP_PROTO(struct rq *rq, struct task_struct *prev,
struct task_struct *next),
TP_ARGS(rq, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
__field( pid_t, prev_pid )
__field( int, prev_prio )
__field( long, prev_state )
__array( char, next_comm, TASK_COMM_LEN )
__field( pid_t, next_pid )
__field( int, next_prio )
),
TP_fast_assign(
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
__entry->prev_state = prev->state;
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
),
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
__entry->prev_state ?
__print_flags(__entry->prev_state, "|",
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
{ 128, "W" }) : "R",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
);
TRACE_EVENT 宏总共由6部分组成:
TP_PROTO:函数原型,指定参数类型
TP_STRUCT__entry:需要传输到ring buffer的数据结构体
TP_fast_assign:给TP_STRUCT__entry定义的结构体赋值,括号内是标准c代码,其中_entry是对结构体的引用
TP_printk:将entry结构体中的数据输出
一旦像上面定义好tracepoint,就可以在具体的函数中添加trace:
if (likely(prev != next)) {
if (!prev->on_rq)
prev->last_sleep_ts = wallclock;
update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, );
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, );
rq->nr_switches++;
rq->curr = next;
++*switch_count;
// 在具体的方法通过trace_definedname的形式添加trace
trace_sched_switch(preempt, prev, next);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
} else {
update_task_ravg(prev, rq, TASK_UPDATE, wallclock, );
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
rq_unlock_irq(rq, &rf);
}
如果同类型的tracepoint即参数相同,但是使用场景稍有区分,可以使用 DECLARE_EVENT_CLASS 来统一管理,DECLARE_EVENT_CLASS和TRACE_EVENT拥有一模一样的6个部分,不同的是DECLARE_EVENT_CLASS里声明的name 是calss name,当定义具体的event的时候使用DEFINE_EVENT宏,第一个参数先带上这个calss name。
比如我们已经按照上面TRACE_EVENT的形式定义了一个名为sched_wakeup_template 的event class,后续的define event可以用如下方式定义:
//sched_wakeup_template是DECLARE_EVENT_CLASS定义的event class name
//sched_wakeup 是具体的event name
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
TP_ARGS(rq, p, success));
//sched_wakeup_template是DECLARE_EVENT_CLASS定义的event class name
//sched_wakeup_new 是具体的event name
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
TP_ARGS(rq, p, success));
通过上面的方式,先定义个event class作为模板,然后定义不同的event就达成了同类型trace event的快速定义。
有了上面的简介铺垫,现在我们来实际添加一个tracepoint,这里的tracepoint添加在ion子系统中,因为ion定义的perf events没有memory alloc相关的事件,而这个事件在我们追踪ion 内存分配和管理时非常关注,所以以此为例。
首先看下ion alloc的相关方法,总共有三个:
struct dma_buf *ion_alloc(size_t len, unsigned int heap_id_mask,
unsigned int flags)
int ion_alloc_fd(size_t len, unsigned int heap_id_mask, unsigned int flags)
struct dma_buf *ion_alloc_dmabuf(size_t len, unsigned int heap_id_mask,
unsigned int flags)
观察下三个方法的参数类型完全一致,于是我们在其现有的ion的trace header file中添加如下trace event class 和event:
注:ion的trace header file路径在:/sources/toco/kernel/msm-4.14/include/trace/events/ion.h
//定义一个ion_alloc_mem_class,并指定参数类型,具体参数
DECLARE_EVENT_CLASS(ion_alloc_mem_class,
TP_PROTO(size_t len, unsigned int heap_id_mask,
unsigned int flags),
TP_ARGS(len, heap_id_mask, flags),
TP_STRUCT__entry(
__field(size_t, len)
__field(unsigned int, heap_id_mask)
__field(unsigned int, flags)
),
TP_fast_assign(
__entry->len = len;
__entry->heap_id_mask = heap_id_mask;
__entry->flags = flags;
),
TP_printk("len %zu heap_id_mask %u flags %x\n",
__entry->len,
__entry->heap_id_mask,
__entry->flags)
);
// 定义三个具体的trace event
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc,
TP_PROTO(size_t len, unsigned int heap_id_mask,
unsigned int flags),
TP_ARGS(len, heap_id_mask, flags)
);
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc_fd,
TP_PROTO(size_t len, unsigned int heap_id_mask,
unsigned int flags),
TP_ARGS(len, heap_id_mask, flags)
);
DEFINE_EVENT(ion_alloc_mem_class, ion_alloc_dmabuf,
TP_PROTO(size_t len, unsigned int heap_id_mask,
unsigned int flags),
TP_ARGS(len, heap_id_mask, flags)
);
然后在具体的代码中添加trace,因为ion_alloc和ion_alloc_fd都会最终调用到ion_alloc_dmabuf中,
我们直接在ion_alloc_dmabuf中添加相应的trace:
struct dma_buf *ion_alloc_dmabuf(size_t len, unsigned int heap_id_mask,
unsigned int flags)
{
struct ion_device *dev = internal_dev;
struct ion_buffer *buffer = NULL;
struct ion_heap *heap;
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct dma_buf *dmabuf;
char task_comm[TASK_COMM_LEN];
pr_debug("%s: len %zu heap_id_mask %u flags %x\n", __func__,
len, heap_id_mask, flags);
// 添加的trace
trace_ion_alloc_dmabuf(len, heap_id_mask, flags);
len = PAGE_ALIGN(len);
if (!len)
return ERR_PTR(-EINVAL);
down_read(&dev->lock);
plist_for_each_entry(heap, &dev->heaps, node) {
/* if the caller didn't specify this heap id */
if (!((1 << heap->id) & heap_id_mask))
continue;
buffer = ion_buffer_create(heap, dev, len, flags);
if (!IS_ERR(buffer) || PTR_ERR(buffer) == -EINTR)
break;
}
up_read(&dev->lock);
if (!buffer)
return ERR_PTR(-ENODEV);
if (IS_ERR(buffer))
return ERR_CAST(buffer);
get_task_comm(task_comm, current->group_leader);
exp_info.ops = &dma_buf_ops;
exp_info.size = buffer->size;
exp_info.flags = O_RDWR;
exp_info.priv = buffer;
exp_info.exp_name = kasprintf(GFP_KERNEL, "%s-%s-%d-%s", KBUILD_MODNAME,
heap->name, current->tgid, task_comm);
dmabuf = dma_buf_export(&exp_info);
if (IS_ERR(dmabuf)) {
_ion_buffer_destroy(buffer);
kfree(exp_info.exp_name);
}
return dmabuf;
}
至此我们完成了自定义添加的tracepoint
添加完自定义的tracepoint也就是perf event后,我们需要用simpleperf来追踪这个事件。
重新编译kernel烧写后,使用simpleperf list | grep ion: 来检查添加的tracepoint是否已经成功:
从上图可以看出我们增加的三个tracepoint已经添加成功
然后使用如下命令record camera provider,并且对ion_alloc_dmabuf进行过滤:
adb shell
simpleperf record -p 764 -e 'ion:ion_alloc_dmabuf' -o /sdcard/perf.data --call-graph dwarf
按ctrl-c结束record,有以下输出时,说明成功记录相应perf 数据:
simpleperf I cmd_record.cpp:635] Samples recorded: 313. Samples lost: 0.
将 perf.data pull出来之后进行simperf report:
这里使用simpleperf的report_sample脚本加上 --show_tracing_data参数将输出到ring buffer的数据同样打印出来。
最终得到的trace结果如下:
[email protected] 1781 [007] 237.483212: 1 ion:ion_alloc_dmabuf:
ffffff870930a588 ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930a586 ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930aa9a ion_alloc ([kernel.kallsyms])
ffffff87091575aa cam_mem_mgr_alloc_and_map ([kernel.kallsyms])
ffffff8709155156 cam_private_ioctl ([kernel.kallsyms])
ffffff8709098d46 __video_do_ioctl ([kernel.kallsyms])
ffffff8709098806 video_usercopy ([kernel.kallsyms])
ffffff8709098b2a video_ioctl2 ([kernel.kallsyms])
ffffff8709097f7a v4l2_ioctl ([kernel.kallsyms])
ffffff870888387e do_vfs_ioctl ([kernel.kallsyms])
ffffff8708883c56 sys_ioctl ([kernel.kallsyms])
ffffff870868487e el0_svc_naked ([kernel.kallsyms])
7db7ed91c8 __ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
7db7e937ec ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
7db327b98c CSLHwInternalDefaultIoctl2(CSLHwDevice const*, unsigned int, void*, unsigned int, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3270f84 CSLAllocHW(char const*, CSLBufferInfo*, unsigned long, unsigned long, unsigned int, int const*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3268f60 CSLAlloc (/vendor/lib64/hw/camera.qcom.so)
7db31ffd04 CamX::MemPoolGroup::AllocateBuffers(unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db32026ac CamX::MemPoolGroup::GetBufferFromPool(CamX::MemPoolBufferManager*, CSLBufferInfo*, CamX::NativeHandle const**) (/vendor/lib64/hw/camera.qcom.so)
7db31fbef4 CamX::MemPoolMgr::GetBufferFromPool(void*, CSLBufferInfo*, CamX::NativeHandle const**) (/vendor/lib64/hw/camera.qcom.so)
7db31dd810 CamX::ImageBuffer::Allocate() (/vendor/lib64/hw/camera.qcom.so)
7db31e3808 CamX::ImageBufferManager::BindBufferManagerImageBuffer(CamX::ImageBuffer*) (/vendor/lib64/hw/camera.qcom.so)
7db31dea50 CamX::ImageBuffer::BindBuffer() (/vendor/lib64/hw/camera.qcom.so)
7db321ea10 CamX::Node::BindInputOutputBuffers(CamX::PerRequestActivePorts const*, int, int) (/vendor/lib64/hw/camera.qcom.so)
7db321d760 CamX::Node::ProcessRequest(CamX::NodeProcessRequestData*, unsigned long) (/vendor/lib64/hw/camera.qcom.so)
7db31ce534 CamX::DeferredRequestQueue::DeferredWorkerCore(CamX::Dependency*) (/vendor/lib64/hw/camera.qcom.so)
7db31ce120 CamX::DeferredRequestQueue::DeferredWorkerWrapper(void*) (/vendor/lib64/hw/camera.qcom.so)
7db31497a8 CamX::ThreadCore::DispatchJob(CamX::RuntimeJob*) (/vendor/lib64/hw/camera.qcom.so)
7db314a078 CamX::ThreadCore::ProcessJobQueue() (/vendor/lib64/hw/camera.qcom.so)
7db31499d0 CamX::ThreadCore::DoWork() (/vendor/lib64/hw/camera.qcom.so)
7db314988c CamX::ThreadCore::WorkerThreadBody(void*) (/vendor/lib64/hw/camera.qcom.so)
7db7eee9d0 __pthread_start(void*) (/apex/com.android.runtime/lib64/bionic/libc.so)
7db7e8cb6c __start_thread (/apex/com.android.runtime/lib64/bionic/libc.so)
tracing data:
common_type : 971
common_flags : 0
common_preempt_count : 1
common_pid : 1781
len : 24081792
heap_id_mask : 34603008
flags : 1
HwBinder:764_2 11526 [002] 235.334226: 1 ion:ion_alloc_dmabuf:
ffffff870930a588 ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930a586 ion_alloc_dmabuf ([kernel.kallsyms])
ffffff870930aa9a ion_alloc ([kernel.kallsyms])
ffffff87091575aa cam_mem_mgr_alloc_and_map ([kernel.kallsyms])
ffffff8709155156 cam_private_ioctl ([kernel.kallsyms])
ffffff8709098d46 __video_do_ioctl ([kernel.kallsyms])
ffffff8709098806 video_usercopy ([kernel.kallsyms])
ffffff8709098b2a video_ioctl2 ([kernel.kallsyms])
ffffff8709097f7a v4l2_ioctl ([kernel.kallsyms])
ffffff870888387e do_vfs_ioctl ([kernel.kallsyms])
ffffff8708883c56 sys_ioctl ([kernel.kallsyms])
ffffff870868487e el0_svc_naked ([kernel.kallsyms])
7db7ed91c8 __ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
7db7e937ec ioctl (/apex/com.android.runtime/lib64/bionic/libc.so)
7db327b98c CSLHwInternalDefaultIoctl2(CSLHwDevice const*, unsigned int, void*, unsigned int, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3270f84 CSLAllocHW(char const*, CSLBufferInfo*, unsigned long, unsigned long, unsigned int, int const*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3268f60 CSLAlloc (/vendor/lib64/hw/camera.qcom.so)
7db31cb85c CamX::CmdBufferManager::InitializePool() (/vendor/lib64/hw/camera.qcom.so)
7db31ca8a4 CamX::CmdBufferManager::Initialize(char const*, CamX::ResourceParams const*) (/vendor/lib64/hw/camera.qcom.so)
7db31ca4a4 CamX::CmdBufferManager::CreateParentManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db31cab20 CamX::CmdBufferManager::CreateMultiManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3226198 CamX::Node::CreateMultiCmdBufferManager(CamX::CmdBufferManagerParam*, unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db2fb0ae4 CamX::IPENode::CreateFWCommandBufferManagers() (/vendor/lib64/hw/camera.qcom.so)
7db2faf864 CamX::IPENode::InitializeStripingParams(_IpeConfigIoData*) (/vendor/lib64/hw/camera.qcom.so)
7db2fae858 CamX::IPENode::SetupDeviceResource(CSLBufferInfo*, CSLDeviceResource*) (/vendor/lib64/hw/camera.qcom.so)
7db2f9fbfc CamX::IPENode::AcquireDevice() (/vendor/lib64/hw/camera.qcom.so)
7db2f9ecfc CamX::IPENode::PostPipelineCreate() (/vendor/lib64/hw/camera.qcom.so)
7db3229894 CamX::Node::NotifyPipelineCreated() (/vendor/lib64/hw/camera.qcom.so)
7db323cdbc CamX::Pipeline::FinalizePipeline(CamX::FinalizeInitializationData*) (/vendor/lib64/hw/camera.qcom.so)
7db3244908 CamX::Session::FinalizeDeferPipeline(unsigned int) (/vendor/lib64/hw/camera.qcom.so)
7db3248324 CamX::Session::StreamOn(void*) (/vendor/lib64/hw/camera.qcom.so)
7d8f464cb4 FeatureMFNR::ActivateOfflinePipeline(FeatureMFNR::MFNRStage) (/vendor/lib64/hw/com.qti.chi.override.so)
7d8f468e30 FeatureMFNR::ExecuteMfnrRequest(FeatureMFNR::MFNRStage, unsigned int, unsigned int, ChiStreamBuffer*, unsigned int, ChiStreamBuffer*, ChiMetadata*, ChiMetadata*) (/vendor/lib64/hw/com.qti.chi.override.so)
7d8f467b3c FeatureMFNR::SubmitOfflinePostfilterStageRequest(unsigned int, camera3_capture_request*, FeatureMFNR::MFNRInputInfo*) (/vendor/lib64/hw/com.qti.chi.override.so)
7d8f46582c FeatureMFNR::SubmitOfflineMfnrRequest(unsigned int, camera3_capture_request*) (/vendor/lib64/hw/com.qti.chi.override.so)
7d8f46dc3c FeatureMFNR::RequestThreadProcessing() (/vendor/lib64/hw/com.qti.chi.override.so)
7d8f45c924 FeatureMFNR::RequestThread(void*) (/vendor/lib64/hw/com.qti.chi.override.so)
7db7eee9d0 __pthread_start(void*) (/apex/com.android.runtime/lib64/bionic/libc.so)
7db7e8cb6c __start_thread (/apex/com.android.runtime/lib64/bionic/libc.so)
tracing data:
common_type : 971
common_flags : 0
common_preempt_count : 1
common_pid : 11526
len : 3473408
heap_id_mask : 34603008
flags : 0
从用户空间到内核的ion alloc调用一目了然,可以帮助我们了解系统和定位问题。
如果使用simpleperf的report_html脚本: python report_html.py -i perf.data -o iontrace.html得到类似下图的结果:
这样会更加的直观。