跟踪内核丢包排查

跟踪内核丢包排查


问题背景

unaForwardKernel 转发udp 丢包

结论

192.168.1.2 > 192.168.1.25 > 192.168.1.201

25 上的unaForwardKernel在Pre-routing 将源地址改为了25,过不了反向路由查找:

25 > 201 OK

201> 25 !OK

排查总结

充分利用更多的信息 如来源和目的IP, sysdig 和 dropwatch 只能找到一堆最后的调用点,无法跟踪整个栈,可以试下systemtap、perf、ebpf

排查过程

原先没有ip信息,只说明了udp丢包。https://www.cnblogs.com/leonxyzh/p/8288339.html

使用systemtap 脚本 进行初步定位kfree_skb 丢包的调用点

#! /usr/bin/env stap
global locations
probe begin { printf("Monitoring for dropped packets\n") }
probe end { printf("Stopping dropped packet monitor\n") }
probe kernel.trace("kfree_skb") { locations[$location] <<< 1}
probe timer.sec(1) {
  foreach(l in locations-) {
    printf("%d packets dropped at %s\n", @count(locations[l]),symname(l))
  }
  printf("*************\n")
}
probe kernel.trace("kfree_skb"){
        printf("##########\n")
        print_backtrace()
        printf("#########\n")
}

282 packets dropped at tpacket_rcv
91 packets dropped at ip_rcv_finish
70 packets dropped at tcp_v4_rcv
26 packets dropped at unix_stream_connect
12 packets dropped at unix_dgram_sendmsg
3 packets dropped at sk_stream_kill_queues
3 packets dropped at tcp_rcv_state_process
1 packets dropped at ip6_mc_input
1 packets dropped at inet_frag_destroy

结果有好几个函数,到底是哪个函数无法确定,对比实验前和实验后 没有明显差别。除非加快实验丢包的频率。

搜索调用栈中的udp关键字, 但是从栈逻辑看 是已经发送完了http://blog.csdn.net/wdscq1234/article/details/51986189

0xffffffffc1f3ca33 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x8a33/0x0]
 0xffffffffc1f3de83 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x9e83/0x0]
 0xffffffffc1f402b4 [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0xc2b4/0x0]
 0xffffffffc1f3402e [stap_01ef9362dd0de8ebe12b09cfab0a1f5_23123+0x2e/0x0]
 0xffffffff81572e50 : kfree_skb+0x70/0xa0 [kernel]
 0xffffffff81681f2f : tpacket_rcv+0x5f/0x920 [kernel]
 0xffffffff8158673f : dev_hard_start_xmit+0x13f/0x3b0 [kernel]
 0xffffffff815af52a : sch_direct_xmit+0x11a/0x240 [kernel]
 0xffffffff81589566 : __dev_queue_xmit+0x226/0x550 [kernel]
 0xffffffff815898a0 : dev_queue_xmit+0x10/0x20 [kernel]
 0xffffffff8159529b : neigh_resolve_output+0x11b/0x220 [kernel]
 0xffffffff815cfb17 : ip_finish_output+0x297/0x780 [kernel]
 0xffffffff815d0303 : ip_output+0x73/0xe0 [kernel]
 0xffffffff815cdf51 : ip_local_out_sk+0x31/0x40 [kernel]
 0xffffffff815d0d46 : ip_send_skb+0x16/0x50 [kernel]
 0xffffffff815f818c : udp_send_skb+0xac/0x2b0 [kernel]
 0xffffffff815f937c : udp_sendmsg+0x32c/0xa00 [kernel]
 0xffffffff81606943 : inet_sendmsg+0x63/0xb0 [kernel]
 0xffffffff8156a580 : sock_sendmsg+0xb0/0xf0 [kernel]
 0xffffffff8156ad37 : ___sys_sendmsg+0x2b7/0x3c0 [kernel]

使用连接中https://www.cnblogs.com/leonxyzh/p/8288339.html的perf分析和dropwatch(类似systemtap)

perf record -g -a -e skb:kfree_skb  这一步一定要持久di
perf script > /tmp/eee

基本都是上面还有这个,感觉这个栈更像接收完数据包后丢弃

swapper     0 [001] 2770193.170970: skb:kfree_skb: skbaddr=0xffff880450e57400 protocol=2048 location=0xffffffff815c9e24
                  772e50 kfree_skb (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  7c9e24 ip_rcv_finish (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  7ca686 ip_rcv (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  786f22 __netif_receive_skb_core (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  787188 __netif_receive_skb (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  787210 netif_receive_skb_internal (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  788318 napi_gro_receive (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                    2575 virtnet_poll ([virtio_net])
                  78799d net_rx_action (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  290b4f __do_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  8b6b1c call_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  22d3c5 do_softirq (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  290ed5 irq_exit (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  8b76b6 do_IRQ (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  8ac2ad common_interrupt (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  8ab3fe default_idle (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  235006 arch_cpu_idle (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  2e7bda cpu_startup_entry (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)
                  251af6 start_secondary (/usr/lib/debug/lib/modules/3.10.0-693.5.2.el7.x86_64/vmlinux)

查看内核代码:跟踪该函数,查看目的和源Ip地址,注意大小端:找到1.25 和1.201 的地址被丢弃,询问正好是应用程序要发送的包。然后针对该地址的包跟踪

#include 
#include 
int main(int argc,char **argv)
{
	    unsigned int ui_ip = atoi(argv[1]);
	    unsigned char *ptr_uc = (unsigned char *)&ui_ip;		     
		char str_ipaddr[20] = { 0 };
		snprintf(str_ipaddr, sizeof(str_ipaddr), "%u.%u.%u.%u",ptr_uc[0], ptr_uc[1], ptr_uc[2], ptr_uc[3]);
		printf("%s", str_ipaddr);

}

global i=0;
global err1=0
global ret1=0;
global ret2=0;
#global s1
#global s2

probe kernel.statement("ip_rcv_finish@net/ipv4/ip_input.c:339") {
	err1=$err
}

#probe kernel.function("__mkroute_input").return{
#        ret1=$return
#        if (ret1 <0 && $daddr==3372329152) {
#                printf("ip_mkroute_input %d %s %s %s\n",ret1, $skb$$, $daddr$$, $saddr$$)
#                printf("##########\n")
#                print_backtrace()
#                printf("#########\n")
#        }
#       s1=$params$
#}

probe kernel.statement("ip_mkroute_input@net/ipv4/route.c") {
	if ($daddr==3372329152) {
		printf("ip_mkroute_input##########\n")
                print_backtrace()
                printf("#########\n")
	}
}

probe kernel.function("__fib_validate_source@net/ipv4/fib_frontend.c").return {
        if ($dst==3372329152 && $return < 0) {
                printf("__fib_validate_source##########ret %d, %s, %d, %d\n", $return,$res$$,$res->type, accept_local)
                print_backtrace()
        }	
	
}

probe kernel.function("ip_route_input_slow").return{
	ret1=$return
	if (ret1 <0 && $daddr==3372329152) {
		printf("ip_route_input_slow %d %s %s %s %p\n",ret1, $skb$$, $daddr$$, $saddr$$, $in_dev)
		printf("##########\n")
        	print_backtrace()
	        printf("#########\n")
	}
#	s1=$params$
}
probe kernel.function("ip_route_input_noref").return {
	ret2=$return
        if (ret2 <0 && $daddr==3372329152) {
		printf("ip_route_input_noref %d %s %s %s\n", ret2, $skb$$, $daddr$$, $saddr$$)
                printf("##########\n")
                print_backtrace()
                printf("#########\n")		
        }
#	s2=$params$
}

probe kernel.function( "ip_rcv_finish").return { 

	if($return==1){
#		printf("ip_rcv_finish return 1: %d %d %s\n%s\n", err1,ret1,ret2, kernel_string(s1), kernel_string(s2))
		printf("ip_rcv_finish return 1: %d %d %d\n", err1,ret1,ret2)
	}

}

内核源码:

https://elixir.bootlin.com/linux/v3.10/source/net/ipv4/fib_frontend.c#L317

最终打印:__fib_validate_source 返回-22,且非UNI_CAST 也非accept_local,上网查资料:反向过滤

ip_mkroute_input##########
 0xffffffff815c7a2a : ip_route_input_slow+0x3ca/0xca0 [kernel]
 0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
#########
__fib_validate_source##########ret -22, {.prefixlen='\000', .nh_sel='\322', .type='\372', .scope='\033', .tclassid=4294936584, .fi=0xffff88081bfad200, .table=0xffff88083fc43af8, .fa_head=0xffff880274f80900}, 250, 0
Returning from:  0xffffffff8160c9d0 : __fib_validate_source.isra.12+0x0/0x400 [kernel]
Returning to  :  0xffffffff8160d684 : fib_validate_source+0x64/0xe0 [kernel]
 0xffffffff815c7b35 : ip_route_input_slow+0x4d5/0xca0 [kernel]
 0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
ip_route_input_slow -22 {.next=0x0, .prev=0x0, ={.tstamp={.tv64=0}, .skb_mstamp={={.v64=0, ={.stamp_us=0, .stamp_jiffies=0}}}}, .sk=0x0, .dev=0xffff88081cffb000, .cb="", ._skb_refdst=0, .sp=0x0, .len=76, .data_len=0, .mac_len=14, .hdr_len=0, ={.csum=0, ={.csum_start=0, .csum_offset=0}}, .priority=0, .ignore_df=0, .cloned=0, .ip_summed=1, .nohdr=0, .nfctinfo=2, .pkt_type=0, .fclone=0, .ipvs_property=0, .peeked=0, .nf_trace=0, .protocol=8, .destructor=0x0, .nfct=0xffff8807b1e5c3c0, ...} 3372329152 419539136 0x1901a8c0
##########
Returning from:  0xffffffff815c7660 : ip_route_input_slow+0x0/0xca0 [kernel]
Returning to  :  0xffffffff815c8346 : ip_route_input_noref+0x46/0x320 [kernel]
 0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
 0xffff880274f80900
#########
ip_route_input_noref -22 {.next=0x0, .prev=0x0, ={.tstamp={.tv64=0}, .skb_mstamp={={.v64=0, ={.stamp_us=0, .stamp_jiffies=0}}}}, .sk=0x0, .dev=0xffff88081cffb000, .cb="", ._skb_refdst=0, .sp=0x0, .len=76, .data_len=0, .mac_len=14, .hdr_len=0, ={.csum=0, ={.csum_start=0, .csum_offset=0}}, .priority=0, .ignore_df=0, .cloned=0, .ip_summed=1, .nohdr=0, .nfctinfo=2, .pkt_type=0, .fclone=0, .ipvs_property=0, .peeked=0, .nf_trace=0, .protocol=8, .destructor=0x0, .nfct=0xffff8807b1e5c3c0, ...} 3372329152 419539136
##########
Returning from:  0xffffffff815c8300 : ip_route_input_noref+0x0/0x320 [kernel]
Returning to  :  0xffffffff815c9d2c : ip_rcv_finish+0xbc/0x350 [kernel]
 0xffffffff816adfe9 : kretprobe_trampoline+0x0/0x57 [kernel]
 0x8000000000000000
#########
ip_rcv_finish return 1: -22 -22 -22
ip_rcv_finish return 1: -22 -22 -22
ip_rcv_finish return 1: -22 -22 -22

多尝试,有些情况下用statement 有些用funciton 有些要加行号 有些行号不准,有些可以尝试进子函数debug

如何调试自编译KO包:uname -r

cp /root/go/src/unaForward2/src/unaForwardKernel/unaForwardKernel.ko /lib/modules/3.10.0-693.5.2.el7.x86_64/extra/

stap -L 'kernel.statement( "ip_rcv_finish@net/ipv4/ip_input.c+26")'
stap -L 'kernel.statement("__fib_validate_source").return'
stap -L 'kernel.function("__fib_validate_source").return'
stap -gu --all-modules trace2.stap

内核debug包

http://rpm.pbone.net/index.php3?stat=26&dist=94&size=379151628&name=kernel-debuginfo-3.10.0-693.5.2.el7.x86_64.rpm

http://debuginfo.centos.org/7/x86_64/

http://www.rpmfind.net/linux/rpm2html/search.php?query=kernel-debuginfo&submit=Search+...&system=&arch=

你可能感兴趣的:(内核-系统)