Ftrace是一个内部跟踪程序,可以用于调试内核、跟踪内核运行状态,有助于排查内核导致的延迟和性能问题。 Ftrace通常被认为是一个linux内置的分析工具,实际上ftrace不仅仅是一个工具,更准确的说是一个综合了多种手段的trace框架,可以用来分析系统延迟、中断的禁用与执行、任务的调度与抢占等等。
不过最常用的还是事件追踪,基于内核event point,可以通过tracefs查看内核的运行情况。
Kernel hacking --->
[*] Tracers --->
[*] Kernel Function Tracer
>>> cat /proc/filesystems | grep tracefs
nodev tracefs //出现tracefs说明内核已开启tracers
>>> mkdir /debug/trace -p
>>> mount -t tracefs nodev /debug/trace
>>> cat /debug/trace/available_tracers //查看当前可用的tracer
function_graph function nop
>>> cat /debug/trace/current_tracer //查看当前tracer
nop
>>> echo function > /debug/trace/current_tracer //将ftrace设置为当前tracer
function
>>> echo 1 > tracing_on //开启tracer
>>> cat trace_pipe
CPU:0 [LOST 117305 EVENTS]
<idle>-0 [000] d..1 1741.362401: __local_bh_enable <-_local_bh_enable
<idle>-0 [000] d.h1 1741.362402: handle_percpu_devid_irq <-generic_handle_irq_desc
...
CPU:1 [LOST 282170 EVENTS]
cat-1742 [001] .... 1816.454528: _raw_spin_lock_irqsave <-uart_write_room
...
...
当中断被屏蔽后,除了NMI和SMI以外,CPU不会响应其它的外部事件,从而造成系统卡顿、调度异常。
内核提供的关闭/打开中断接口如下:
/* hardirq enable/disable api */
local_irq_disable()
local_irq_enable()
local_irq_save()
local_irq_restore()
/* softirq enable/disable api */
local_bh_disable()
local_bh_enable()
示例代码:
int irqoff_test(void)
{
long long i = 1;
while(i == 0) {
i++;
}
return i;
}
local_irq_disable();
while(i < 10) {
irqoff_test();
i++;
}
local_irq_enable()
将上述代码放到一个ko的初始化中,用来构造关闭中断的测试环境。
# echo 0 > options/function-trace
# echo irqsoff > current_tracer
# echo 1 > tracing_on
# echo 0 > tracing_max_latency
# cat trace
# cat tracing_max_latency
3932
# cat trace
# tracer: irqsoff
#
# irqsoff latency trace v1.1.5 on 5.15.0
# --------------------------------------------------------------------
# latency: 3932 us, #17/17, CPU#0 | (M:server VP:0, KP:0, SP:0 HP:0 #P:1)
# -----------------
# | task: insmod-113 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
# => started at: do_one_initcall
# => ended at: do_one_initcall
#
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / _-=> migrate-disable
# ||||| / delay
# cmd pid |||||| time | caller
# \ / |||||| \ | /
insmod-113 0d.... 1us : uleds_init <-do_one_initcall
insmod-113 0d.... 3us+: start_backtrace <-return_address
insmod-113 0d.... 22us!: irqoff_test <-uleds_init
insmod-113 0d.... 465us!: irqoff_test <-uleds_init
insmod-113 0d.... 850us!: irqoff_test <-uleds_init
insmod-113 0d.... 1234us!: irqoff_test <-uleds_init
insmod-113 0d.... 1617us!: irqoff_test <-uleds_init
insmod-113 0d.... 2001us!: irqoff_test <-uleds_init
insmod-113 0d.... 2385us!: irqoff_test <-uleds_init
insmod-113 0d.... 2769us!: irqoff_test <-uleds_init
insmod-113 0d.... 3152us!: irqoff_test <-uleds_init
insmod-113 0d.... 3536us!: irqoff_test <-uleds_init
insmod-113 0d.... 3928us : start_backtrace <-return_address
insmod-113 0d.... 3930us : start_backtrace <-return_address
insmod-113 0d.... 3932us : uleds_init <-do_one_initcall
insmod-113 0d.... 3937us+: tracer_hardirqs_on <-do_one_initcall
insmod-113 0d.... 3947us : <stack trace>
=> do_one_initcall
=> do_init_module
=> load_module
=> __do_sys_finit_module
=> __arm64_sys_finit_module
=> invoke_syscall
=> el0_svc_common.constprop.0
=> do_el0_svc
=> el0_svc
=> el0t_64_sync_handler
=> el0t_64_sync
可以看到中断被关闭了 3932ms,关闭中断时执行了irqoff_test 函数。
当抢占关闭时,虽然可以响应中断,但高优先级的进程在中断处理完成后,不能抢占低优先级进程直到打开抢占,这样会导致抢占延迟。preemptoff 用于跟踪和记录关闭抢占的最大延迟。
开启抢占检测需要将内核抢占模型设置为 “Preemptible Kernel”,位置在general setup->Preemption Model。
内核提供控制抢占的直接接口:
preempt_disable(); // 关闭抢占
preempt_enable(); // 开启抢占
直接控制的接口一般不太常用,但是间接调用的接口使用率会比较高,比如:
spin_lock();
spin_unlock();
测试代码:
spinlock_t test_spin_lock;
int preemptoff_test(void)
{
long long i = 1;
while(i == 0) {
i++;
}
return i;
}
spin_lock_init(&test_spin_lock);
spin_lock(&test_spin_lock);
while(i < 10) {
irqoff_test();
i++;
}
spin_unlock(&test_spin_lock);
# echo preemptoff > current_tracer
# echo 1 > tracing_on
# echo 0 > tracing_max_latency
# cat trace
# tracer: preemptoff
#
# preemptoff latency trace v1.1.5 on 5.15.0
# --------------------------------------------------------------------
# latency: 4167 us, #129/129, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:1)
# -----------------
# | task: insmod-144 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
# => started at: uleds_init
# => ended at: uleds_init
#
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / _-=> migrate-disable
# ||||| / delay
# cmd pid |||||| time | caller
# \ / |||||| \ | /
insmod-144 0...1. 1us+: _raw_spin_lock <-uleds_init
insmod-144 0...1. 22us!: preemptoff_test <-uleds_init
insmod-144 0...1. 459us!: preemptoff_test <-uleds_init
...
insmod-144 0...1. 3770us!: preemptoff_test <-uleds_init
insmod-144 0...1. 4162us : _raw_spin_unlock <-uleds_init
insmod-144 0...1. 4163us : preempt_count_sub <-_raw_spin_unlock
insmod-144 0...1. 4165us : start_backtrace <-return_address
insmod-144 0...1. 4167us : _raw_spin_unlock <-uleds_init
insmod-144 0...1. 4173us+: tracer_preempt_on <-uleds_init
insmod-144 0...1. 4186us : <stack trace>
可以看到,检测到关闭抢占延迟4167 us,其中调用延迟最多的函数为preemptoff_test。
调度延迟检测的是线程被唤醒到真正被调度之间的时间。按照调度器类和调度策略不同,延迟检测分为三种:
tracefs的接口约有50个左右,如下所示,包括上一节中提到的available_tracers
、current_tracer
等。
# ls /debug/trace/
README set_event
available_events set_event_notrace_pid
available_filter_functions set_event_pid
available_tracers set_ftrace_filter
buffer_percent set_ftrace_notrace
buffer_size_kb set_ftrace_notrace_pid
buffer_total_size_kb set_ftrace_pid
current_tracer set_graph_function
dyn_ftrace_total_info set_graph_notrace
dynamic_events timestamp_mode
enabled_functions trace
error_log trace_clock
events trace_marker
free_buffer trace_marker_raw
function_profile_enabled trace_options
instances trace_pipe
max_graph_depth trace_stat
options tracing_cpumask
per_cpu tracing_on
printk_formats tracing_thresh
saved_cmdlines uprobe_events
saved_cmdlines_size uprobe_profile
saved_tgids
下面我们对一些常用接口进行说明和测试。
用于设置或显示已配置的当前跟踪程序。 更改当前跟踪程序将清除环缓冲区内容以及“snapshot”缓冲区。
>>> cat current_tracer
function
>>> echo function_graph > current_tracer
>>> cat current_tracer
function_graph