- 起因是这样的,研究xv6操作系统的网络协议栈,在github上找到一个仓库,但是是基于x86的,下载下来编译运行正常。代码中是通过qemu添加tap虚拟接口的方式运行的,于是乎想看看xv6 risv-v的能不能也通过tap模式运行。
- 修改Makefile中关于net的部分如下
ifeq ($(LAB),net)
#QEMUOPTS += -netdev user,id=net0,hostfwd=udp::$(FWDPORT)-:2001 -object filter-dump,id=net0,netdev=net0,file=packets.pcap
QEMUOPTS += -netdev tap,id=n1,ifname=tap0 -object filter-dump,id=f1,netdev=n1,file=packets.pcap
QEMUOPTS += -device e1000,netdev=n1,bus=pcie.0
endif
- 由于我是在docker环境下进行的实验,运行时发现,docker不开启 --privileged=true 没法打开/dev/net/tun。加上该选项后,每次执行make qemu都要等待很久才能引导操作系统运行。一开始以为准备网络协议栈要很久,就没管。
- 之前通过一个项目编译过linux2.6.26版本的镜像,修改qemu启动参数如下发现启动得很快,想着应该是哪里出了问题。
qemu-system-i386 -nographic -kernel linux-2.6.26/arch/x86/boot/bzImage -initrd initrd.img -append "root=/dev/ram init=/bin/sh console=ttyS0" -netdev user,id=mynet0 -device e1000,netdev=mynet0
qemu-system-i386 -nographic -kernel linux-2.6.26/arch/x86/boot/bzImage -initrd initrd.img -append "root=/dev/ram init=/bin/sh console=ttyS0" -net tap -net nic
排查过程
- 首先通过strace 跟踪指令执行,发现父进程卡在waitpid系统调用。再strace跟踪子进程,发现子进程疯狂调用close系统调用。
[pid 3686] close(6247675) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247676) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247677) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247678) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247679) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247680) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247681) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247682) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247683) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247684) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247685) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247686) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247687) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247688) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247689) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247690) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247691) = -1 EBADF (Bad file descriptor)
[pid 3686] close(6247692) = -1 EBADF (Bad file descriptor)
- 想着gdb查看一下,但是当时编译qemu4.1的时候没添加调试选项,需要重新编译。研究configure发现有一个配置选项 --enable-debug-info,但是到链接的时候会报各种 undefine reference。于是通过其他选项添加: ./configure --extra-cflags=“-g”。然后替换qemu-system-riscv64。
- 这个时候gdb能看到堆栈了,attach到子进程,堆栈如下。查看源码,发现逻辑是执行 /etc/qemu-ifup脚本的循环问题,首先获取打开文件描述符的最大值,然后循环的去关闭文件描述符。打印open_max发现值是10亿。。。 难怪卡这么久。
(gdb) bt
#0 0x00007f8aa4d715d7 in __close (fd=fd@entry=300095783) at ../sysdeps/unix/sysv/linux/close.c:27
#1 0x000055dd0a64b27e in launch_script (setup_script=setup_script@entry=0x55dd0a97961e "/etc/qemu-ifup",
ifname=ifname@entry=0x7ffd6b5e17c0 "tap0", fd=fd@entry=10, errp=errp@entry=0x7ffd6b5e17b8) at ../net/tap.c:419
#2 0x000055dd0a64cb6b in net_tap_init (ifname_sz=128, tap=0x7ffd6b5e18a0, tap=0x7ffd6b5e18a0, errp=0x7ffd6b5e1a20,
mq_required=, ifname=0x7ffd6b5e17c0 "tap0", setup_script=0x55dd0a97961e "/etc/qemu-ifup",
vnet_hdr=0x7ffd6b5e17ac) at ../net/tap.c:632
#3 net_init_tap (netdev=0x7ffd6b5e1890, name=0x0, peer=0x55dd0b63d500, errp=0x7ffd6b5e1a20) at ../net/tap.c:920
#4 0x000055dd0a63b12c in net_client_init1 (object=, is_netdev=, errp=0x7ffd6b5e1a20)
at ../net/net.c:1053
#5 0x000055dd0a63b7af in net_client_init (opts=, is_netdev=, errp=0x7ffd6b5e1b98)
at ../net/net.c:1153
#6 0x000055dd0a7d8e57 in qemu_opts_foreach (list=0x55dd0aebed60 ,
func=0x55dd0a63b850 , opaque=0x0, errp=0x7ffd6b5e1b98) at util/qemu-option.c:1170
#7 0x000055dd0a63db2a in net_init_clients (errp=0x7ffd6b5e1b98) at ../net/net.c:1574
#8 0x000055dd0a4d9096 in main (argc=20, argv=0x7ffd6b5e1e18, envp=0x7ffd6b5e1ec0) at vl.c:4276
(gdb) f 1
#1 0x000055dd0a64b27e in launch_script (setup_script=setup_script@entry=0x55dd0a97961e "/etc/qemu-ifup",
ifname=ifname@entry=0x7ffd6b5e17c0 "tap0", fd=fd@entry=10, errp=errp@entry=0x7ffd6b5e17b8) at ../net/tap.c:419
419 close(i);
(gdb) p open_max
$1 = 1073741816
// net/tap.c
static void launch_script(const char *setup_script, const char *ifname,
int fd, Error **errp)
{
int pid, status;
char *args[3];
char **parg;
/* try to launch network script */
pid = fork();
if (pid < 0) {
error_setg_errno(errp, errno, "could not launch network script %s",
setup_script);
return;
}
if (pid == 0) {
int open_max = sysconf(_SC_OPEN_MAX), i;
for (i = 3; i < open_max; i++) {
if (i != fd) {
close(i);
}
}
parg = args;
*parg++ = (char *)setup_script;
*parg++ = (char *)ifname;
*parg = NULL;
execv(setup_script, args);
_exit(1);
} else {
while (waitpid(pid, &status, 0) != pid) {
/* loop */
}
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
return;
}
error_setg(errp, "network script %s failed with status %d",
setup_script, status);
}
}
- 这下问题就清晰了,打开文件描述符最大值导致的,docker开启–privileged=true的时候会把打开最大文件上限设置得很大。正常环境下就1024。两者的ulimit -a输出对比如下
// docker开启--privileged=true
-t: cpu time (seconds) unlimited
-f: file size (blocks) unlimited
-d: data seg size (kbytes) unlimited
-s: stack size (kbytes) 8192
-c: core file size (blocks) unlimited
-m: resident set size (kbytes) unlimited
-u: processes unlimited
-n: file descriptors 1073741816
-l: locked-in-memory size (kbytes) 8192
-v: address space (kbytes) unlimited
-x: file locks unlimited
-i: pending signals 60824
-q: bytes in POSIX msg queues 819200
-e: max nice 0
-r: max rt priority 0
-N 15: unlimited
// 正常环境
-t: cpu time (seconds) unlimited
-f: file size (blocks) unlimited
-d: data seg size (kbytes) unlimited
-s: stack size (kbytes) 8192
-c: core file size (blocks) unlimited
-m: resident set size (kbytes) unlimited
-u: processes 60824
-n: file descriptors 1024
-l: locked-in-memory size (kbytes) 8192
-v: address space (kbytes) unlimited
-x: file locks unlimited
-i: pending signals 60824
-q: bytes in POSIX msg queues 819200
-e: max nice 0
-r: max rt priority 0
-N 15: rt cpu time (microseconds) unlimited
解决方案