linux内核 fault-injection

实验环境raspi 4b

一、Fault注入功能基础架构

See also drivers/md/md-faulty.c and “every_nth” module option for scsi_debug.
Available fault injection capabilities

  • failslab
    injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), …)

  • fail_page_alloc
    injects page allocation failures. (alloc_pages(), get_free_pages(), …)

  • fail_usercopy
    injects failures in user memory access functions. (copy_from_user(), get_user(), …)

  • fail_futex
    injects futex deadlock and uaddr fault errors.

  • fail_sunrpc
    injects kernel RPC client and server failures.

  • fail_make_request
    injects disk IO errors on devices permitted by setting
    /sys/block//make-it-fail or
    /sys/block///make-it-fail. (submit_bio_noacct())

  • fail_mmc_request
    injects MMC data errors on devices permitted by setting
    debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request

  • fail_function
    injects error return on specific functions, which are marked by
    ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
    under /sys/kernel/debug/fail_function. No boot option supported.

  • NVMe fault injection
    inject NVMe status code and retry flag on devices permitted by setting
    debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default
    status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
    retry flag can be set via the debugfs.

二、打开内核相关调试选项

看看系统中有没有这个文件,从结果来看,并没有,有什么特殊的编译选项需要开启?

root@runninglinuxkernel:~# ls -alh /sys/block/vda
lrwxrwxrwx 1 root root 0 Mar 13 23:59 /sys/block/vda -> ../devices/pci0000:00/0000:00:03.0/virtio0/block/vda
root@runninglinuxkernel:~# ls -alh /sys/block/vda/
total 0
drwxr-xr-x 8 root root    0 Mar 13 23:59 .
drwxr-xr-x 3 root root    0 Mar 13 23:59 ..
-r--r--r-- 1 root root 4.0K Mar 14 00:02 alignment_offset
lrwxrwxrwx 1 root root    0 Mar 14 00:02 bdi -> ../../../../../virtual/bdi/254:0
-rw-r--r-- 1 root root 4.0K Mar 14 00:02 cache_type
-r--r--r-- 1 root root 4.0K Mar 14 00:02 capability
-r--r--r-- 1 root root 4.0K Mar 14 00:02 dev
lrwxrwxrwx 1 root root    0 Mar 14 00:02 device -> ../../../virtio0
-r--r--r-- 1 root root 4.0K Mar 14 00:02 discard_alignment
-r--r--r-- 1 root root 4.0K Mar 14 00:02 ext_range
-r--r--r-- 1 root root 4.0K Mar 14 00:02 hidden
drwxr-xr-x 2 root root    0 Mar 14 00:02 holders
-r--r--r-- 1 root root 4.0K Mar 14 00:02 inflight
drwxr-xr-x 3 root root    0 Mar 13 23:59 mq
drwxr-xr-x 2 root root    0 Mar 14 00:02 power
drwxr-xr-x 3 root root    0 Mar 14 00:02 queue
-r--r--r-- 1 root root 4.0K Mar 14 00:02 range
-r--r--r-- 1 root root 4.0K Mar 14 00:02 removable
-r--r--r-- 1 root root 4.0K Mar 14 00:02 ro
-r--r--r-- 1 root root 4.0K Mar 13 23:59 serial
-r--r--r-- 1 root root 4.0K Mar 14 00:02 size
drwxr-xr-x 2 root root    0 Mar 13 23:59 slaves
-r--r--r-- 1 root root 4.0K Mar 14 00:02 stat
lrwxrwxrwx 1 root root    0 Mar 13 23:59 subsystem -> ../../../../../../class/block
drwxr-xr-x 2 root root    0 Mar 14 00:02 trace
-rw-r--r-- 1 root root 4.0K Mar 13 23:59 uevent

那就查查配置文件中FAULT_INJECT相关的字段。

rlk@rlk:runninglinuxkernel_5.0$ grep "FAULT_INJECT" * -r
arch/riscv/configs/busybox_defconfig:# CONFIG_FAULT_INJECTION is not set
arch/riscv/configs/debian_defconfig:# CONFIG_FAULT_INJECTION is not set
arch/x86/configs/debian_defconfig:# CONFIG_FAULT_INJECTION is not set
arch/arm64/configs/busybox_defconfig:# CONFIG_F2FS_FAULT_INJECTION is not set
arch/arm64/configs/busybox_defconfig:# CONFIG_FAULT_INJECTION is not set
arch/arm64/configs/debian_default_defconfig:# CONFIG_DRBD_FAULT_INJECTION is not set
arch/arm64/configs/debian_default_defconfig:# CONFIG_F2FS_FAULT_INJECTION is not set
arch/arm64/configs/debian_default_defconfig:# CONFIG_NFSD_FAULT_INJECTION is not set
arch/arm64/configs/debian_default_defconfig:# CONFIG_FAULT_INJECTION is not set
arch/arm64/configs/debian_defconfig:# CONFIG_F2FS_FAULT_INJECTION is not set
arch/arm64/configs/debian_defconfig:# CONFIG_FAULT_INJECTION is not set
  • 将故障注入相关内核编译选项打开。
CONFIG_FUNCTION_ERROR_INJECTION=y
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y

linux内核 fault-injection_第1张图片
有可能在.config文件中找不到对应的配置选项,需要在menuconfig中输入/查找,然后将对应的选项打开。
linux内核 fault-injection_第2张图片

  • 重新编译树莓派内核。
make KERNEL=kernel8 ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- Image modules dtbs -j24
  • 重新编译内核源码之后,再次查看已经有磁盘IO相关的故障注入相关选项。
curtis@raspberrypi:~ $ ll /sys/class/block/sda/
total 0
-r--r--r-- 1 root root 4096 Jun 29 15:16 alignment_offset
lrwxrwxrwx 1 root root    0 Jun 29 15:16 bdi -> ../../../../../../../../../../../../../../virtual/bdi/8:0
-r--r--r-- 1 root root 4096 Jun 29 15:16 capability
-r--r--r-- 1 root root 4096 Jun 29 14:17 dev
lrwxrwxrwx 1 root root    0 Jun 29 15:16 device -> ../../../0:0:0:0
-r--r--r-- 1 root root 4096 Jun 29 15:16 discard_alignment
-r--r--r-- 1 root root 4096 Jun 29 15:16 diskseq
-r--r--r-- 1 root root 4096 Jun 29 15:16 events
-r--r--r-- 1 root root 4096 Jun 29 15:16 events_async
-rw-r--r-- 1 root root 4096 Jun 29 15:16 events_poll_msecs
-r--r--r-- 1 root root 4096 Jun 29 15:16 ext_range
-r--r--r-- 1 root root 4096 Jun 29 15:16 hidden
drwxr-xr-x 2 root root    0 Jun 29 15:16 holders
-r--r--r-- 1 root root 4096 Jun 29 15:16 inflight
drwxr-xr-x 2 root root    0 Jun 29 15:16 integrity
-rw-r--r-- 1 root root 4096 Jun 29 15:16 io-timeout-fail
-rw-r--r-- 1 root root 4096 Jun 29 15:16 make-it-fail
drwxr-xr-x 3 root root    0 Jun 29 15:16 mq
drwxr-xr-x 2 root root    0 Jun 29 15:16 power
drwxr-xr-x 3 root root    0 Jun 29 14:17 queue
-r--r--r-- 1 root root 4096 Jun 29 15:16 range
-r--r--r-- 1 root root 4096 Jun 29 14:17 removable
-r--r--r-- 1 root root 4096 Jun 29 14:24 ro
drwxr-xr-x 5 root root    0 Jun 29 14:17 sda1
drwxr-xr-x 5 root root    0 Dec 22  2022 sda2
-r--r--r-- 1 root root 4096 Jun 29 14:24 size
drwxr-xr-x 2 root root    0 Jun 29 14:17 slaves
-r--r--r-- 1 root root 4096 Jun 29 14:24 stat
lrwxrwxrwx 1 root root    0 Jun 29 14:17 subsystem -> ../../../../../../../../../../../../../../../class/block
drwxr-xr-x 2 root root    0 Jun 29 15:16 trace
-rw-r--r-- 1 root root 4096 Dec 22  2022 uevent

已经具备故障注入的能力,应该怎么使用??

root@raspberrypi:/home/curtis# ll /sys/kernel/debug/fail
fail_function/     fail_futex/        fail_io_timeout/   fail_make_request/ fail_page_alloc/   failslab/          fail_usercopy/

root@raspberrypi:/home/curtis# ll /sys/kernel/debug/fail_make_request/
interval                       reject-start                   space                          times                          verbose_ratelimit_interval_ms
probability                    require-end                    stacktrace-depth               verbose
reject-end                     require-start                  task-filter                    verbose_ratelimit_burst

以上这些是具体的故障的控制节点。

三、内核fault-injection行为控制

3.1 debugfs 入口

fault-inject-debugfs内核模块提供一些debugfs的入口以便在运行时配置fault-injection能力。

  • /sys/kernel/debug/fail*/probability:
    故障注入的可能性,以百分比为单位。
    单位:percent(百分比)
    需要特别注意的是,对于某个测试用例而言,当probability=100时,可以通过设置/sys/kernel/debug/fail*/interval来控制错误的间隔时间。

  • /sys/kernel/debug/fail*/interval:
    指定两个故障之间的间隔时间,内核通过调用should_fail()函数来使得故障注入不生效。
    注意:如果使能该选项,需要设定interval>1(单位是什么??),设置该选项一般是在probability=100时使用。

  • /sys/kernel/debug/fail*/times:
    指定故障产生的最大次数,如果设置为-1,意味着没有上限,将会一直出错。

  • /sys/kernel/debug/fail*/space:
    设置异常的size余量,每次执行到故障注入点后,都会将在该space的基础上递减size值,直到该值降低为0后才会注入异常。其中size的含义对各种异常各不相同,对于IO异常表示的是本次IO的字节数,对于内存分配表示的是内存的大小。默认值为0。

  • /sys/kernel/debug/fail*/verbose
    Format: { 0 | 1 | 2 }
    指定故障注入时相关信息的详细程度,0意味着没有任何信息,1每个failure只会打印一行调试信息,2将会打印函数调用trace - 在对故障注入能力做调试使用。(从实际的使用结果来看,默认值为2)

  • /sys/kernel/debug/fail*/task-filter:
    Format: { ‘Y’ | ‘N’ }
    设置进程过滤,N表示不过滤,Y表示对启用了make-it-fail的进程和在中断上下文的流程进行过滤(通过/proc//make-it-fail=1进行设置),不触发故障注入。默认值为N。

  • /sys/kernel/debug/fail*/require-start,

  • /sys/kernel/debug/fail*/require-end,

  • /sys/kernel/debug/fail*/reject-start,

  • /sys/kernel/debug/fail*/reject-end:
    设置调用流程的虚拟地址空间过滤。若调用流程设计的代码段(Text段)包含在require-start -> require-end且不包含在reject-start -> reject-end中才注入异常,可以用来设置故障注入只针对某个或某些模块执行。默认required范围为[0, ULONG_MAX)(即整个虚拟地址空间),rejected范围为[0, 0)。

  • /sys/kernel/debug/fail*/stacktrace-depth:
    设置[require-start, require-end) 和[reject-start, reject-end)跟踪的调用深度。默认值为32。

  • /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
    格式:{ ‘Y’ | ‘N’ }
    设置页分配的高端内存过滤,设置为Y后当分配的内存类型包含__GFP_HIGHMEM(高端内存)不启用故障注入。默认值为N。

  • /sys/kernel/debug/failslab/ignore-gfp-wait:

  • /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
    格式:{ ‘Y’ | ‘N’ }
    设置内存分配的分配模式过滤,设置为Y后只对非睡眠的内存分配启用故障注入(GFP_ATOMIC)。默认值为N。

  • /sys/kernel/debug/fail_page_alloc/min-order:
    设置页分配order的过滤限制,当内核分配页小于该设定值则不进行故障注入。默认值为1

  • /sys/kernel/debug/fail_futex/ignore-private:
    格式:{ ‘Y’ | ‘N’ }
    默认为“N”,将其设置为“Y”将禁用故障注入在处理私有(地址空间)futexes 时。

  • /sys/kernel/debug/fail_sunrpc/ignore-client-disconnect:
    格式:{ ‘Y’ | ‘N’ }
    默认为“N”,将其设置为“Y”将禁用断开连接在 RPC 客户端上注入。

  • /sys/kernel/debug/fail_sunrpc/ignore-server-disconnect:
    格式:{ ‘Y’ | ‘N’ }
    默认为“N”,将其设置为“Y”将禁用断开连接在 RPC 服务器上注入。

  • /sys/kernel/debug/fail_sunrpc/ignore-cache-wait:

    格式:{ ‘Y’ | ‘N’ }

    默认为“N”,将其设置为“Y”将禁用缓存等待在 RPC 服务器上注入。

  • /sys/kernel/debug/fail_function/inject:

    格式:{ ‘函数名’ | ‘!函数名’ | ‘’ }

    通过名称指定错误注入的目标函数,如果函数名称前导 ‘!’ 前缀,给定的函数是从注入列表中删除。如果没有指定 (‘’),注入列表被清除。

  • /sys/kernel/debug/fail_function/injectable:

    (只读)显示错误注入函数和什么类型可以指定错误值。错误类型将是以下之一
    以下;

    • NULL:retval 必须为 0。
    • ERRNO:retval 必须为 -1 到 -MAX_ERRNO (-4096)。
    • ERR_NULL:retval 必须是 0 或 -1 到 -MAX_ERRNO (-4096)。
  • /sys/kernel/debug/fail_function/<函数名>/retval:

    指定要注入给定函数的“错误”返回值,这将在用户指定新的注入条目时创建。
    请注意,此文件仅接受无符号值。所以,如果你想使用负 errno,你最好使用 ‘printf’ 而不是 ‘echo’,例如:
    $ printf %#x -12 > retval

3.2 故障注入启动参数配置

前文中提到的debugfs接口只在debugfs启用后在有效,对于在内核启动阶段或没有设置debugfs配置选项的情况,Fault-injection的默认配置值通过启动参数进行传递,包括以下:

fail_page_alloc=
fail_make_request=
fail_futex=
mmc_core.fail_request=,,,

通过启动参数传入的参数有限,目前只能接受interval、probability、space和times这4个参数(其他参数会被内核设置为默认的值),但是在一般情况下也够用了。

例如:如果想在内核启动阶段就启用failslab 100%无限故障注入,则可以传入内核启动参数:

failslab=1,100,0,-1

过程条目
^^^^^^^^^^^^

  • /proc//fail-nth,
    /proc/self/task//fail-nth:

    向该文件写入整数 N 会使任务中的第 N 次调用失败。
    读取此文件返回一个整数值。“0”值表示
    注入了先前写入此文件的故障设置。
    正整数 N 表示故障尚未注入。
    请注意,此文件启用所有类型的故障(slab、futex 等)。
    此设置优先于所有其他通用 debugfs 设置
    例如概率、间隔、时间等。但是按能力设置
    (例如 fail_futex/ignore-private)优先于它。

    此功能旨在对单个故障进行系统测试
    系统调用。请参见下面的示例。

四、添加新的故障注入能力

  • #include

-定义故障属性

DECLARE_FAULT_ATTR(名称);

请看fault-inject.h中struct fault_attr的定义了解详情。

-提供一种配置故障属性的方法

-引导选项

如果您需要从启动时启用故障注入功能,您可以
提供引导选项来配置它。它有一个辅助函数:

setup_fault_attr(attr, str);
  • debugfs 条目

    faillab、fail_page_alloc、fail_usercopy 和 fail_make_request 使用这种方式。
    辅助功能:

    fault_create_debugfs_attr(名称,父级,属性);

-模块参数

如果故障注入能力的范围限制在单内核模块,最好提供模块参数给配置故障属性。

-添加一个钩子来插入失败

在 should_fail() 返回 true 时,客户端代码应该注入一个失败:

应该失败(属性,大小);

五、应用实例

Inject slab allocation failures into module init/exit code.

#!/bin/bash

FAILTYPE=failslab
echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
echo 10 > /sys/kernel/debug/$FAILTYPE/probability
echo 100 > /sys/kernel/debug/$FAILTYPE/interval
echo -1 > /sys/kernel/debug/$FAILTYPE/times
echo 0 > /sys/kernel/debug/$FAILTYPE/space
echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait

faulty_system()
{
	bash -c "echo 1 > /proc/self/make-it-fail && exec $*"
}

if [ $# -eq 0 ]
then
	echo "Usage: $0 modulename [ modulename ... ]"
	exit 1
fi

for m in $*
do
	echo inserting $m...
	faulty_system modprobe $m

	echo removing $m...
	faulty_system modprobe -r $m
done

Inject page allocation failures only for a specific module::

    #!/bin/bash

    FAILTYPE=fail_page_alloc
    module=$1

    if [ -z $module ]
    then
	echo "Usage: $0 "
	exit 1
    fi

    modprobe $module

    if [ ! -d /sys/module/$module/sections ]
    then
	echo Module $module is not loaded
	exit 1
    fi

    cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
    cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end

    echo N > /sys/kernel/debug/$FAILTYPE/task-filter
    echo 10 > /sys/kernel/debug/$FAILTYPE/probability
    echo 100 > /sys/kernel/debug/$FAILTYPE/interval
    echo -1 > /sys/kernel/debug/$FAILTYPE/times
    echo 0 > /sys/kernel/debug/$FAILTYPE/space
    echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
    echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
    echo Y > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
    echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth

    trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT

    echo "Injecting errors into the module $module... (interrupt to stop)"
    sleep 1000000

Inject open_ctree error while btrfs mount::

    #!/bin/bash

    rm -f testfile.img
    dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
    DEVICE=$(losetup --show -f testfile.img)
    mkfs.btrfs -f $DEVICE
    mkdir -p tmpmnt

    FAILTYPE=fail_function
    FAILFUNC=open_ctree
    echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
    printf %#x -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
    echo N > /sys/kernel/debug/$FAILTYPE/task-filter
    echo 100 > /sys/kernel/debug/$FAILTYPE/probability
    echo 0 > /sys/kernel/debug/$FAILTYPE/interval
    echo -1 > /sys/kernel/debug/$FAILTYPE/times
    echo 0 > /sys/kernel/debug/$FAILTYPE/space
    echo 1 > /sys/kernel/debug/$FAILTYPE/verbose

    mount -t btrfs $DEVICE tmpmnt
    if [ $? -ne 0 ]
    then
	echo "SUCCESS!"
    else
	echo "FAILED!"
	umount tmpmnt
    fi

    echo > /sys/kernel/debug/$FAILTYPE/inject

    rmdir tmpmnt
    losetup -d $DEVICE
    rm testfile.img

fail_io_timeout使用示例

  1. 先配置/sys/kernel/debug/fail_io_timeout故障参数。

    # 设置故障出现的可能性为100%
    root@raspberrypi:/sys/kernel/debug/fail_io_timeout# echo 100 > probability 
    
    # 没有错误次数上限
    root@raspberrypi:/sys/kernel/debug/fail_io_timeout# echo -1 > times 
    
    # 设置两次故障的间隔时间为 10ms
    root@raspberrypi:/sys/kernel/debug/fail_io_timeout# echo 10 > interval 
    
  2. 使能故障

    root@raspberrypi:/sys/kernel/debug/fail_io_timeout# echo 1 > /sys/block/sdb/io-timeout-fail
    
    # 使用dd命令触发I/O流程
    root@runninglinuxkernel:~# dd if=/dev/sdb of=./test.img bs=1M count=100 oflag=direct
    [ 1430.673613] FAULT_INJECTION: forcing a failure.
                   name fail_io_timeout, interval 10, probability 100, space 0, times -1
    [ 1430.673660] CPU: 2 PID: 107 Comm: usb-storage Tainted: G         C         6.1.35-v8 #3
    [ 1430.673676] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT)
    [ 1430.673685] Call trace:
    [ 1430.673691]  dump_backtrace+0xfc/0x108
    [ 1430.673711]  show_stack+0x20/0x30
    [ 1430.673722]  dump_stack_lvl+0x8c/0xb8
    [ 1430.673740]  dump_stack+0x18/0x34
    [ 1430.673753]  should_fail_ex+0x1e4/0x238
    [ 1430.673770]  should_fail+0x14/0x20
    [ 1430.673784]  __blk_should_fake_timeout+0x24/0x30
    [ 1430.673804]  scsi_done_internal+0x13c/0x160
    [ 1430.673822]  scsi_done_direct+0x1c/0x28
    [ 1430.673837]  usb_stor_control_thread+0x274/0x2b0
    [ 1430.673855]  kthread+0x100/0x118
    [ 1430.673871]  ret_from_fork+0x10/0x20
    [ 1461.268398] VFS: busy inodes on changed media sdb
    [ 1461.269517] sd 1:0:0:0: [sdb] 61440000 512-byte logical blocks: (31.5 GB/29.3 GiB)
    [ 1461.290253] FAULT_INJECTION: forcing a failure.
                   name fail_io_timeout, interval 10, probability 100, space 0, times -1 <-- 设置的故障参数
    [ 1461.290298] CPU: 0 PID: 107 Comm: usb-storage Tainted: G         C         6.1.35-v8 #3
    [ 1461.290315] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT)
    [ 1461.290325] Call trace:	<-- 函数调用栈
    [ 1461.290331]  dump_backtrace+0xfc/0x108
    [ 1461.290352]  show_stack+0x20/0x30
    [ 1461.290364]  dump_stack_lvl+0x8c/0xb8
    [ 1461.290381]  dump_stack+0x18/0x34
    [ 1461.290393]  should_fail_ex+0x1e4/0x238
    [ 1461.290411]  should_fail+0x14/0x20
    [ 1461.290425]  __blk_should_fake_timeout+0x24/0x30
    [ 1461.290445]  scsi_done_internal+0x13c/0x160
    [ 1461.290462]  scsi_done_direct+0x1c/0x28
    [ 1461.290477]  usb_stor_control_thread+0x274/0x2b0
    [ 1461.290496]  kthread+0x100/0x118
    [ 1461.290511]  ret_from_fork+0x10/0x20
    [ 1491.982600] sd 1:0:0:0: [sdb] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x03 driverbyte=DRIVER_OK cmd_age=30s
    [ 1491.982642] sd 1:0:0:0: [sdb] tag#0 CDB: opcode=0x28 28 00 03 a9 7f f9 00 00 01 00
    [ 1491.982661] I/O error, dev sdb, sector 61439993 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2
    [ 1491.986407] FAULT_INJECTION: forcing a failure.
                   name fail_io_timeout, interval 10, probability 100, space 0, times -1
    [ 1491.986456] CPU: 0 PID: 107 Comm: usb-storage Tainted: G         C         6.1.35-v8 #3
    [ 1491.986475] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT)
    [ 1491.986485] Call trace:
    [ 1491.986491]  dump_backtrace+0xfc/0x108
    [ 1491.986513]  show_stack+0x20/0x30
    [ 1491.986524]  dump_stack_lvl+0x8c/0xb8
    [ 1491.986542]  dump_stack+0x18/0x34
    [ 1491.986555]  should_fail_ex+0x1e4/0x238
    [ 1491.986574]  should_fail+0x14/0x20
    [ 1491.986588]  __blk_should_fake_timeout+0x24/0x30
    [ 1491.986609]  scsi_done_internal+0x13c/0x160
    [ 1491.986626]  scsi_done_direct+0x1c/0x28
    [ 1491.986641]  usb_stor_control_thread+0x274/0x2b0
    [ 1491.986661]  kthread+0x100/0x118
    [ 1491.986676]  ret_from_fork+0x10/0x20
    
    # 数据拷贝进程进入D状态
    curtis@raspberrypi:~ $ ps aux | grep dd
    root        4149  0.0  0.0   6016  2508 pts/0    D+   00:38   0:00 dd if=/dev/sdb of=./test.img bs=1M count=100 iflag=direct
    curtis      4325  0.0  0.0   6044   636 pts/1    S+   00:40   0:00 grep --color=auto dd
    curtis@raspberrypi:~ $
    

Tool to run command with failslab or fail_page_alloc

In order to make it easier to accomplish the tasks mentioned above, we can use tools/testing/fault-injection/failcmd.sh. Please run a command “./tools/testing/fault-injection/failcmd.sh --help” for more information and see the following examples.

#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# NAME
#	failcmd.sh - run a command with injecting slab/page allocation failures
#
# SYNOPSIS
#	failcmd.sh --help
#	failcmd.sh [] command [arguments]
#
# DESCRIPTION
#	Run command with injecting slab/page allocation failures by fault
#	injection.
#
#	NOTE: you need to run this script as root.
#

usage()
{
	cat >&2 <<EOF
Usage: $0 [options] command [arguments]

OPTIONS
	-p percent
	--probability=percent
		likelihood of failure injection, in percent.
		Default value is 1

	-t value
	--times=value
		specifies how many times failures may happen at most.
		Default value is 1

	--oom-kill-allocating-task=value
		set /proc/sys/vm/oom_kill_allocating_task to specified value
		before running the command.
		Default value is 1

	-h, --help
		Display a usage message and exit

	--interval=value, --space=value, --verbose=value, --task-filter=value,
	--stacktrace-depth=value, --require-start=value, --require-end=value,
	--reject-start=value, --reject-end=value, --ignore-gfp-wait=value
		See Documentation/fault-injection/fault-injection.rst for more
		information

	failslab options:
	--cache-filter=value

	fail_page_alloc options:
	--ignore-gfp-highmem=value, --min-order=value

ENVIRONMENT
	FAILCMD_TYPE
		The following values for FAILCMD_TYPE are recognized:

		failslab
			inject slab allocation failures
		fail_page_alloc
			inject page allocation failures

		If FAILCMD_TYPE is not defined, then failslab is used.
EOF
}

if [ $UID != 0 ]; then
	echo must be run as root >&2
	exit 1
fi

DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`

if [ ! -d "$DEBUGFS" ]; then
	echo debugfs is not mounted >&2
	exit 1
fi

FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
FAULTATTR=$DEBUGFS/$FAILCMD_TYPE

if [ ! -d $FAULTATTR ]; then
	echo $FAILCMD_TYPE is not available >&2
	exit 1
fi

LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help

if [ $FAILCMD_TYPE = failslab ]; then
	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
fi

TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`

if [ $? != 0 ]; then
	usage
	exit 1
fi

eval set -- "$TEMP"

fault_attr_default()
{
	echo N > $FAULTATTR/task-filter
	echo 0 > $FAULTATTR/probability
	echo 1 > $FAULTATTR/times
}

fault_attr_default

oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`

restore_values()
{
	fault_attr_default
	echo $oom_kill_allocating_task_saved \
		> /proc/sys/vm/oom_kill_allocating_task
}

#
# Default options
#
declare -i oom_kill_allocating_task=1
declare task_filter=Y
declare -i probability=1
declare -i times=1

while true; do
	case "$1" in
	-p|--probability)
		probability=$2
		shift 2
		;;
	-i|--interval)
		echo $2 > $FAULTATTR/interval
		shift 2
		;;
	-t|--times)
		times=$2
		shift 2
		;;
	-s|--space)
		echo $2 > $FAULTATTR/space
		shift 2
		;;
	-v|--verbose)
		echo $2 > $FAULTATTR/verbose
		shift 2
		;;
	--task-filter)
		task_filter=$2
		shift 2
		;;
	--stacktrace-depth)
		echo $2 > $FAULTATTR/stacktrace-depth
		shift 2
		;;
	--require-start)
		echo $2 > $FAULTATTR/require-start
		shift 2
		;;
	--require-end)
		echo $2 > $FAULTATTR/require-end
		shift 2
		;;
	--reject-start)
		echo $2 > $FAULTATTR/reject-start
		shift 2
		;;
	--reject-end)
		echo $2 > $FAULTATTR/reject-end
		shift 2
		;;
	--oom-kill-allocating-task)
		oom_kill_allocating_task=$2
		shift 2
		;;
	--ignore-gfp-wait)
		echo $2 > $FAULTATTR/ignore-gfp-wait
		shift 2
		;;
	--cache-filter)
		echo $2 > $FAULTATTR/cache_filter
		shift 2
		;;
	--ignore-gfp-highmem)
		echo $2 > $FAULTATTR/ignore-gfp-highmem
		shift 2
		;;
	--min-order)
		echo $2 > $FAULTATTR/min-order
		shift 2
		;;
	-h|--help)
		usage
		exit 0
		shift
		;;
	--)
		shift
		break
		;;
	esac
done

[ -z "$1" ] && exit 0

echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
echo $task_filter > $FAULTATTR/task-filter
echo $probability > $FAULTATTR/probability
echo $times > $FAULTATTR/times

trap "restore_values" SIGINT SIGTERM EXIT

cmd="echo 1 > /proc/self/make-it-fail && exec $@"
bash -c "$cmd"

Examples:

Run a command “make -C tools/testing/selftests/ run_tests” with injecting slab
allocation failure::

# ./tools/testing/fault-injection/failcmd.sh \
	-- make -C tools/testing/selftests/ run_tests

Same as above except to specify 100 times failures at most instead of one time
at most by default::

# ./tools/testing/fault-injection/failcmd.sh --times=100 \
	-- make -C tools/testing/selftests/ run_tests

Same as above except to inject page allocation failure instead of slab
allocation failure::

# env FAILCMD_TYPE=fail_page_alloc \
	./tools/testing/fault-injection/failcmd.sh --times=100 \
	-- make -C tools/testing/selftests/ run_tests

Systematic faults using fail-nth

The following code systematically faults 0-th, 1-st, 2-nd and so on capabilities in the socketpair() system call::

  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 

  int main()
  {
	int i, err, res, fail_nth, fds[2];
	char buf[128];

	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
	fail_nth = open(buf, O_RDWR);
	for (i = 1;; i++) {
		sprintf(buf, "%d", i);
		write(fail_nth, buf, strlen(buf));
		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
		err = errno;
		pread(fail_nth, buf, sizeof(buf), 0);
		if (res == 0) {
			close(fds[0]);
			close(fds[1]);
		}
		printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y',
			res, err);
		if (atoi(buf))
			break;
	}
	return 0;
  }

An example output::

	1-th fault Y: res=-1/23
	2-th fault Y: res=-1/23
	3-th fault Y: res=-1/12
	4-th fault Y: res=-1/12
	5-th fault Y: res=-1/23
	6-th fault Y: res=-1/23
	7-th fault Y: res=-1/23
	8-th fault Y: res=-1/12
	9-th fault Y: res=-1/12
	10-th fault Y: res=-1/12
	11-th fault Y: res=-1/12
	12-th fault Y: res=-1/12
	13-th fault Y: res=-1/12
	14-th fault Y: res=-1/12
	15-th fault Y: res=-1/12
	16-th fault N: res=0/12

后续使用的时候不断完善。

你可能感兴趣的:(Linux,内核,linux,运维,服务器)