跟踪内核丢包排查
问题背景
unaForwardKernel 转发udp 丢包
结论
192.168.1.2 > 192.168.1.25 > 192.168.1.201
25 上的unaForwardKernel在Pre-routing 将源地址改为了25,过不了反向路由查找:
25 > 201 OK
201> 25 !OK
排查总结
充分利用更多的信息 如来源和目的IP, sysdig 和 dropwatch 只能找到一堆最后的调用点,无法跟踪整个栈,可以试下systemtap、perf、ebpf
排查过程
原先没有ip信息,只说明了udp丢包。https://www.cnblogs.com/leonxyzh/p/8288339.html
使用systemtap 脚本 进行初步定位kfree_skb 丢包的调用点
#! /usr/bin/env stap
global locations
probe begin { printf("Monitoring for dropped packets\n") }
probe end { printf("Stopping dropped packet monitor\n") }
probe kernel.trace("kfree_skb") { locations[$location] <<< 1}
probe timer.sec(1) {
foreach(l in locations-) {
printf("%d packets dropped at %s\n", @count(locations[l]),symname(l))
}
printf("*************\n")
}
probe kernel.trace("kfree_skb"){
printf("##########\n")
print_backtrace()
printf("#########\n")
}
282 packets dropped at tpacket_rcv
91 packets dropped at ip_rcv_finish
70 packets dropped at tcp_v4_rcv
26 packets dropped at unix_stream_connect
12 packets dropped at unix_dgram_sendmsg
3 packets dropped at sk_stream_kill_queues
3 packets dropped at tcp_rcv_state_process
1 packets dropped at ip6_mc_input
1 packets dropped at inet_frag_destroy
结果有好几个函数,到底是哪个函数无法确定,对比实验前和实验后 没有明显差别。除非加快实验丢包的频率。
搜索调用栈中的udp关键字, 但是从栈逻辑看 是已经发送完了http://blog.csdn.net/wdscq1234/article/details/51986189
0xffffffffc1f3ca33 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x8a33/0x0]
0xffffffffc1f3de83 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x9e83/0x0]
0xffffffffc1f402b4 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0xc2b4/0x0]
0xffffffffc1f3402e [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x2e/0x0]
0xffffffff81572e50 : kfree_skb+0x70/0xa0 [kernel]
0xffffffff81681f2f : tpacket_rcv+0x5f/0x920 [kernel]
0xffffffff8158673f : dev_hard_start_xmit+0x13f/0x3b0 [kernel]
0xffffffff815af52a : sch_direct_xmit+0x11a/0x240 [kernel]
0xffffffff81589566 : __dev_queue_xmit+0x226/0x550 [kernel]
0xffffffff815898a0 : dev_queue_xmit+0x10/0x20 [kernel]
0xffffffff8159529b : neigh_resolve_output+0x11b/0x220 [kernel]
0xffffffff815cfb17 : ip_finish_output+0x297/0x780 [kernel]
0xffffffff815d0303 : ip_output+0x73/0xe0 [kernel]
0xffffffff815cdf51 : ip_local_out_sk+0x31/0x40 [kernel]
0xffffffff815d0d46 : ip_send_skb+0x16/0x50 [kernel]
0xffffffff815f818c : udp_send_skb+0xac/0x2b0 [kernel]
0xffffffff815f937c : udp_sendmsg+0x32c/0xa00 [kernel]
0xffffffff81606943 : inet_sendmsg+0x63/0xb0 [kernel]
0xffffffff8156a580 : sock_sendmsg+0xb0/0xf0 [kernel]
0xffffffff8156ad37 : ___sys_sendmsg+0x2b7/0x3c0 [kernel]
使用连接中https://www.cnblogs.com/leonxyzh/p/8288339.html的perf分析和dropwatch(类似systemtap)
perf record -g -a -e skb:kfree_skb 这一步一定要持久di
perf script > /tmp/eee
基本都是上面还有这个,感觉这个栈更像接收完数据包后丢弃
swapper 0 [001] 2770193.170970: skb:kfree_skb: skbaddr=0xffff880450e57400 protocol=2048 location=0xffffffff815c9e24
772e50 kfree_skb (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
7c9e24 ip_rcv_finish (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
7ca686 ip_rcv (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
786f22 __netif_receive_skb_core (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
787188 __netif_receive_skb (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
787210 netif_receive_skb_internal (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
788318 napi_gro_receive (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
2575 virtnet_poll ([virtio_net])
78799d net_rx_action (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
290b4f __do_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
8b6b1c call_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
22d3c5 do_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
290ed5 irq_exit (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
8b76b6 do_IRQ (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
8ac2ad common_interrupt (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
8ab3fe default_idle (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
235006 arch_cpu_idle (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
2e7bda cpu_startup_entry (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
251af6 start_secondary (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
查看内核代码:跟踪该函数,查看目的和源Ip地址,注意大小端:找到1.25 和1.201 的地址被丢弃,询问正好是应用程序要发送的包。然后针对该地址的包跟踪
#include
#include
int main(int argc,char **argv)
{
unsigned int ui_ip = atoi(argv[1]);
unsigned char *ptr_uc = (unsigned char *)&ui_ip;
char str_ipaddr[20] = { 0 };
snprintf(str_ipaddr, sizeof(str_ipaddr), "%u.%u.%u.%u",ptr_uc[0], ptr_uc[1], ptr_uc[2], ptr_uc[3]);
printf("%s", str_ipaddr);
}
global i=0;
global err1=0
global ret1=0;
global ret2=0;
#global s1
#global s2
probe kernel.statement("ip_rcv_finish@net/ipv4/ip_input.c:339") {
err1=$err
}
#probe kernel.function("__mkroute_input").return{
# ret1=$return
# if (ret1 <0 && $daddr==3372329152) {
# printf("ip_mkroute_input %d %s %s %s\n",ret1, $skb$$, $daddr$$, $saddr$$)
# printf("##########\n")
# print_backtrace()
# printf("#########\n")
# }
# s1=$params$
#}
probe kernel.statement("ip_mkroute_input@net/ipv4/route.c") {
if ($daddr==3372329152) {
printf("ip_mkroute_input##########\n")
print_backtrace()
printf("#########\n")
}
}
probe kernel.function("__fib_validate_source@net/ipv4/fib_frontend.c").return {
if ($dst==3372329152 && $return < 0) {
printf("__fib_validate_source##########ret %d, %s, %d, %d\n", $return,$res$$,$res->type, accept_local)
print_backtrace()
}
}
probe kernel.function("ip_route_input_slow").return{
ret1=$return
if (ret1 <0 && $daddr==3372329152) {
printf("ip_route_input_slow %d %s %s %s %p\n",ret1, $skb$$, $daddr$$, $saddr$$, $in_dev)
printf("##########\n")
print_backtrace()
printf("#########\n")
}
# s1=$params$
}
probe kernel.function("ip_route_input_noref").return {
ret2=$return
if (ret2 <0 && $daddr==3372329152) {
printf("ip_route_input_noref %d %s %s %s\n", ret2, $skb$$, $daddr$$, $saddr$$)
printf("##########\n")
print_backtrace()
printf("#########\n")
}
# s2=$params$
}
probe kernel.function( "ip_rcv_finish").return {
if($return==1){
# printf("ip_rcv_finish return 1: %d %d %s\n%s\n", err1,ret1,ret2, kernel_string(s1), kernel_string(s2))
printf("ip_rcv_finish return 1: %d %d %d\n", err1,ret1,ret2)
}
}
内核源码:
https://elixir.bootlin.com/linux/v3.10/source/net/ipv4/fib_frontend.c#L317
最终打印:__fib_validate_source 返回-22,且非UNI_CAST 也非accept_local,上网查资料:反向过滤
ip_mkroute_input##########
0xffffffff815c7a2a : ip_route_input_slow+0x3ca/0xca0 [kernel]
0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
#########
__fib_validate_source##########ret -22, {.prefixlen='\000', .nh_sel='\322', .type='\372', .scope='\033', .tclassid=4294936584, .fi=0xffff88081bfad200, .table=0xffff88083fc43af8, .fa_head=0xffff880274f80900}, 250, 0
Returning from: 0xffffffff8160c9d0 : __fib_validate_source.isra.12+0x0/0x400 [kernel]
Returning to : 0xffffffff8160d684 : fib_validate_source+0x64/0xe0 [kernel]
0xffffffff815c7b35 : ip_route_input_slow+0x4d5/0xca0 [kernel]
0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
ip_route_input_slow -22 {.next=0x0, .prev=0x0, ={.tstamp={.tv64=0}, .skb_mstamp={={.v64=0, ={.stamp_us=0, .stamp_jiffies=0}}}}, .sk=0x0, .dev=0xffff88081cffb000, .cb="", ._skb_refdst=0, .sp=0x0, .len=76, .data_len=0, .mac_len=14, .hdr_len=0, ={.csum=0, ={.csum_start=0, .csum_offset=0}}, .priority=0, .ignore_df=0, .cloned=0, .ip_summed=1, .nohdr=0, .nfctinfo=2, .pkt_type=0, .fclone=0, .ipvs_property=0, .peeked=0, .nf_trace=0, .protocol=8, .destructor=0x0, .nfct=0xffff8807b1e5c3c0, ...} 3372329152 419539136 0x1901a8c0
##########
Returning from: 0xffffffff815c7660 : ip_route_input_slow+0x0/0xca0 [kernel]
Returning to : 0xffffffff815c8346 : ip_route_input_noref+0x46/0x320 [kernel]
0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
0xffff880274f80900
#########
ip_route_input_noref -22 {.next=0x0, .prev=0x0, ={.tstamp={.tv64=0}, .skb_mstamp={={.v64=0, ={.stamp_us=0, .stamp_jiffies=0}}}}, .sk=0x0, .dev=0xffff88081cffb000, .cb="", ._skb_refdst=0, .sp=0x0, .len=76, .data_len=0, .mac_len=14, .hdr_len=0, ={.csum=0, ={.csum_start=0, .csum_offset=0}}, .priority=0, .ignore_df=0, .cloned=0, .ip_summed=1, .nohdr=0, .nfctinfo=2, .pkt_type=0, .fclone=0, .ipvs_property=0, .peeked=0, .nf_trace=0, .protocol=8, .destructor=0x0, .nfct=0xffff8807b1e5c3c0, ...} 3372329152 419539136
##########
Returning from: 0xffffffff815c8300 : ip_route_input_noref+0x0/0x320 [kernel]
Returning to : 0xffffffff815c9d2c : ip_rcv_finish+0xbc/0x350 [kernel]
0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
0x8000000000000000
#########
ip_rcv_finish return 1: -22 -22 -22
ip_rcv_finish return 1: -22 -22 -22
ip_rcv_finish return 1: -22 -22 -22
多尝试,有些情况下用statement 有些用funciton 有些要加行号 有些行号不准,有些可以尝试进子函数debug
如何调试自编译KO包:uname -r
cp /root/go/src/unaForward2/src/unaForwardKernel/unaForwardKernel.ko /lib/modules/3.10.0-693.5.2.el7.x86_64/extra/
stap -L 'kernel.statement( "ip_rcv_finish@net/ipv4/ip_input.c+26")'
stap -L 'kernel.statement("__fib_validate_source").return'
stap -L 'kernel.function("__fib_validate_source").return'
stap -gu --all-modules trace2.stap
内核debug包
http://rpm.pbone.net/index.php3?stat=26&dist=94&size=379151628&name=kernel-debuginfo-3.10.0-693.5.2.el7.x86_64.rpm
http://debuginfo.centos.org/7/x86_64/
http://www.rpmfind.net/linux/rpm2html/search.php?query=kernel-debuginfo&submit=Search+...&system=&arch=