linux kernel 内存踩踏之KASAN(一)

一、背景

linux 内核出现内存类问题时,我们常用的调试工具就是kasan,kasan有三种模式:

1. Generic KASAN (这个就是我们最常用的,1 debug byte indicate 8 bytes use state, 对标用户层 asan)

2. Software Tag-Based KASAN (这个可以对标用户层 hwasan,仅64位生效)

3. Hardware Tag-Based KASAN (大名鼎鼎的MTE, 也是arm64 且硬件平台需要额外支持)

后面将对比三种不同的kasan 使用,实现原理以及使用案例

二、KASAN使能相关配置(Generic版本)

kasan相关config是否打开

/dev # zcat /proc/config.gz | grep -i kasan
CONFIG_KASAN_SHADOW_OFFSET=0xdfff800000000000   //这个offset怎来来的什么含义?下一节描述
CONFIG_DRIVER_KASAN_TEST=m
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y
CONFIG_HAVE_ARCH_KASAN_HW_TAGS=y
CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
CONFIG_CC_HAS_KASAN_GENERIC=y
CONFIG_CC_HAS_KASAN_SW_TAGS=y
CONFIG_KASAN=y
CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y
CONFIG_KASAN_GENERIC=y    //标准版本kasan
# CONFIG_KASAN_SW_TAGS is not set
# CONFIG_KASAN_HW_TAGS is not set
CONFIG_KASAN_OUTLINE=y
# CONFIG_KASAN_INLINE is not set
CONFIG_KASAN_STACK=y      //stack kasan检测,如局部变量,局部数组等操作引起的内存踩踏
CONFIG_KASAN_VMALLOC=y    //vmalloc kasan检测,使用vmalloc申请内存的内存踩踏

这里使用的是普通版本 Generic KASAN

三、kasan基本原理

linux kernel 内存踩踏之KASAN(一)_第1张图片

shadow byte 的值:

1~7 表示8byte可使用的情况

0表示8byte均可使用

其他值表示无法使用,常见的shadow byte值可以看mm/kasan/kasan.h定义:

#ifdef CONFIG_KASAN_GENERIC
#define KASAN_PAGE_FREE		0xFF  /* freed page */
#define KASAN_PAGE_REDZONE	0xFE  /* redzone for kmalloc_large allocation */
#define KASAN_SLAB_REDZONE	0xFC  /* redzone for slab object */
#define KASAN_SLAB_FREE		0xFB  /* freed slab object */
#define KASAN_VMALLOC_INVALID	0xF8  /* inaccessible space in vmap area */

#define KASAN_SLAB_FREETRACK	0xFA  /* freed slab object with free track */
#define KASAN_GLOBAL_REDZONE	0xF9  /* redzone for global variable */

/* Stack redzone shadow values. Compiler ABI, do not change. */
#define KASAN_STACK_LEFT	0xF1
#define KASAN_STACK_MID		0xF2
#define KASAN_STACK_RIGHT	0xF3
#define KASAN_STACK_PARTIAL	0xF4

/* alloca redzone shadow values. */
#define KASAN_ALLOCA_LEFT	0xCA
#define KASAN_ALLOCA_RIGHT	0xCB

下图是arm64 48位 pagesize 4K的内存映射图,正好32TB映射整个内核空间。

linux kernel 内存踩踏之KASAN(一)_第2张图片

前面一节遗留一个问题,CONFIG_KASAN_SHADOW_OFFSET=0xdfff800000000000

这个shadowoffset 是用来看什么的?这个实际是根据我们内核虚拟地址位数,kasan模式计算而来的

config KASAN_SHADOW_OFFSET
    hex  
    depends on KASAN_GENERIC || KASAN_SW_TAGS
    default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
    default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
    default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
    default 0xdfffffc000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS
    default 0xdffffff800000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS
    default 0xefff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS
    default 0xefffc00000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS
    default 0xeffffe0000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS
    default 0xefffffc000000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS
    default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
    default 0xfffffffffffffff

计算方法是:

CONFIG_KASAN_SHADOW_OFFSET= KASAN_SHADOW_START - KERNEL_ADDR_START >>3

= 0xffff600000000000 - ( 0xffff000000000000 >> 3) = 0xdfff800000000000

有了这个kasan_shadow_offset, 后面我们需要获取一个内核地址对应的shadow 位置,只需要通过公式:

kernel_addr >> 3 + CONFIG_KASAN_SHADOW_OFFSET = kernel_addr对应的shadow_addr

四、利用 test driver程序验证

下面是一个简易的测试用例,用来测试kmalloc、page、全局变量、stack变量和vmalloc的内存踩踏

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 


#include "../../../mm/kasan/kasan.h"

int global_kasan_value[2] = {996, 007};

struct kasan_test_type {
    int type;
};

static struct kasan_test_type *gptr = NULL;

enum kasan_test_case{
    slab_out_of_bounds = 0,
    page_out_of_bounds = 1,
    global_out_of_bounds = 2,
    stack_out_of_bounds = 3,
    use_after_free = 4,
    vmalloc_out_of_bounds = 5,
    alloca_out_of_bounds = 6,
};

static void kmalloc_oob_right(size_t size, int write_offset)
{
    char *ptr;

    ptr = kmalloc(size, GFP_KERNEL);
    pr_info("%s %llx\n", __func__, (unsigned long long)ptr);

    ptr[size - 1 + write_offset] = 'y';

    kfree(ptr);
}

static void global_oob_left(void)
{
	pr_info("global arr oob access %d\n", global_kasan_value[2]);
}

static void pagealloc_oob_right(size_t order)
{
    char *ptr;
    struct page *pages;

    size_t size = (1UL << (PAGE_SHIFT + order));

    pages = alloc_pages(GFP_KERNEL, order);
    ptr = page_address(pages);
    pr_info("%s %llx\n", __func__, (unsigned long long)ptr);
    ptr[0] = ptr[size];
    free_pages((unsigned long)ptr, order);
}

static void pagealloc_uaf(size_t order)
{
	char *ptr;
	struct page *pages;

	pages = alloc_pages(GFP_KERNEL, order);
	ptr = page_address(pages);
	pr_info("%s %llx", __func__, (unsigned long long)ptr);
	free_pages((unsigned long)ptr, order);

	pr_info("%s %d\n", __func__, ptr[0]);
}

static void vmalloc_oob(size_t size)
{
    char *v_ptr;

    v_ptr = vmalloc(size);

    OPTIMIZER_HIDE_VAR(v_ptr);

    pr_info("%s %llx", __func__, (unsigned long long)v_ptr);
    /* Make sure in-bounds accesses are valid. */
    v_ptr[0] = 0;
    v_ptr[size - 1] = 0;

    /* trigger oob access */
    pr_info("%s %d\n", __func__, v_ptr[size]);

    vfree(v_ptr);
}

static void kasan_stack_oob(void)
{
	char stack_array[10];
	/* See comment in kasan_global_oob_right. */
	char *volatile array = stack_array;
	char *p = &array[ARRAY_SIZE(stack_array) + 4];
	pr_info("%s  %d\n", __func__, *p);
}

static void kasan_test_case(int type)
{
    //使能kasan多次检测,默认只上报一次
    bool multishot = kasan_save_enable_multi_shot();

    switch(type) {
        case slab_out_of_bounds:
            kmalloc_oob_right(128, 2); //alloc 128 byte and overwrite 2 offset
            break;

        case page_out_of_bounds:
            pagealloc_oob_right(0);
            break;

        case global_out_of_bounds:
            global_oob_left();
            break;

        case stack_out_of_bounds:
            kasan_stack_oob();
            break;

        case use_after_free:
            pagealloc_uaf(0);
            break;

        case vmalloc_out_of_bounds:
            vmalloc_oob(2048);
            break;

        default :
            pr_info("undef error type %d\n", type);
            break;
    }

    kasan_restore_multi_shot(multishot);
    pr_info("%s type %d\n", __func__, type);
}

static ssize_t kasan_testcase_write(struct file *filp, const char __user *buf,
                   size_t len, loff_t *off)
{
    char *kbuf;
    int ntcase;
    kbuf = kmalloc(len + 1, GFP_KERNEL);
    if (copy_from_user(kbuf, buf, len) != 0) {
        pr_info("copy the buff failed \n");
        goto done;
    }

    ntcase = simple_strtoul(kbuf, NULL, 0);

    kasan_test_case(ntcase);
done:
    return len;
}

static struct file_operations kasan_fops = {
    .owner  =   THIS_MODULE,
    .write  =   kasan_testcase_write,
    .llseek =   noop_llseek,
};

static struct miscdevice kasan_misc = {
    .minor  = MISC_DYNAMIC_MINOR,
    .name   = "kasan_test",
    .fops   = &kasan_fops,
};

static int __init kasan_start(void) 
{
    int ret;

    ret = misc_register(&kasan_misc);
    if (ret < 0) {
        printk(KERN_EMERG " kasan test register failed %d\n", ret);
        return ret;
    }
    gptr = kzalloc(sizeof(struct kasan_test_type), GFP_KERNEL);

    printk(KERN_INFO "kasan test register\n");
    return 0;
}

static void __exit kasan_end(void) 
{ 
    misc_deregister(&kasan_misc);
} 

MODULE_LICENSE("GPL");
MODULE_AUTHOR("geek");
MODULE_DESCRIPTION("A simple kasan test driver!");
MODULE_VERSION("0.1");
 
module_init(kasan_start);
module_exit(kasan_end);

五、内存踩踏testcase调试

触发kmalloc的out of bound访问

/dev # echo 0 > /dev/kasan_test 
[ 4063.037612] kmalloc_oob_right ffff000006e57400
[ 4063.065278] ==================================================================
[ 4063.073081] BUG: KASAN: slab-out-of-bounds in kasan_testcase_write+0x170/0x4d8 [kasan_driver]
[ 4063.075812] Write of size 1 at addr ffff000006e57481 by task sh/179
[ 4063.076529] 
[ 4063.077151] CPU: 5 PID: 179 Comm: sh Tainted: G    B            N 6.6.1-g3cba94c761ec-dirty #15
[ 4063.077902] Hardware name: linux,dummy-virt (DT)
[ 4063.078538] Call trace:
[ 4063.078926]  dump_backtrace+0x90/0xe8
[ 4063.079771]  show_stack+0x18/0x24
[ 4063.079971]  dump_stack_lvl+0x48/0x60
[ 4063.080175]  print_report+0xf8/0x5d8
[ 4063.080372]  kasan_report+0xc4/0x108
[ 4063.080566]  __asan_store1+0x60/0x6c
[ 4063.080783]  kasan_testcase_write+0x170/0x4d8 [kasan_driver]
[ 4063.081141]  vfs_write+0x158/0x45c
[ 4063.081492]  ksys_write+0xd0/0x180
[ 4063.081835]  __arm64_sys_write+0x44/0x58
[ 4063.082188]  invoke_syscall+0x60/0x184
[ 4063.082550]  el0_svc_common.constprop.0+0x78/0x13c
[ 4063.082955]  do_el0_svc+0x30/0x40
[ 4063.083179]  el0_svc+0x38/0x70
[ 4063.083351]  el0t_64_sync_handler+0x120/0x12c
[ 4063.083553]  el0t_64_sync+0x190/0x194
[ 4063.083853] 
[ 4063.083982] Allocated by task 179:
[ 4063.084229]  kasan_save_stack+0x3c/0x64
[ 4063.084559]  kasan_set_track+0x2c/0x40
[ 4063.084757]  kasan_save_alloc_info+0x24/0x34
[ 4063.084975]  __kasan_kmalloc+0xb8/0xbc
[ 4063.085230]  kmalloc_trace+0x48/0x5c
[ 4063.085438]  kasan_testcase_write+0x154/0x4d8 [kasan_driver]
[ 4063.085758]  vfs_write+0x158/0x45c
[ 4063.085965]  ksys_write+0xd0/0x180
[ 4063.086155]  __arm64_sys_write+0x44/0x58
[ 4063.086355]  invoke_syscall+0x60/0x184
[ 4063.086556]  el0_svc_common.constprop.0+0x78/0x13c
[ 4063.086790]  do_el0_svc+0x30/0x40
[ 4063.086984]  el0_svc+0x38/0x70
[ 4063.087168]  el0t_64_sync_handler+0x120/0x12c
[ 4063.087385]  el0t_64_sync+0x190/0x194
[ 4063.087600] 
[ 4063.087749] The buggy address belongs to the object at ffff000006e57400
[ 4063.087749]  which belongs to the cache kmalloc-128 of size 128
[ 4063.088269] The buggy address is located 1 bytes to the right of
[ 4063.088269]  allocated 128-byte region [ffff000006e57400, ffff000006e57480)
[ 4063.088708] 
[ 4063.088928] The buggy address belongs to the physical page:
[ 4063.089384] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x46e56
[ 4063.089990] head:(____ptrval____) order:1 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[ 4063.090330] flags: 0x3fffc0000000840(slab|head|node=0|zone=0|lastcpupid=0xffff)
[ 4063.090960] page_type: 0xffffffff()
[ 4063.091467] raw: 03fffc0000000840 ffff0000060028c0 dead000000000122 0000000000000000
[ 4063.091776] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[ 4063.092095] page dumped because: kasan: bad access detected
[ 4063.092322] 
[ 4063.092443] Memory state around the buggy address:
[ 4063.092785]  ffff000006e57380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.093109]  ffff000006e57400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 4063.093404] >ffff000006e57480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.093690]                    ^
[ 4063.093906]  ffff000006e57500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.094188]  ffff000006e57580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 4063.094470] ==================================================================
[ 4063.096441] kasan_test_case type 0

对应代码

static void kmalloc_oob_right(size_t size, int write_offset)
{
    char *ptr;

    ptr = kmalloc(size, GFP_KERNEL);
    pr_info("%s %llx\n", __func__, (unsigned long long)ptr);

    ptr[size - 1 + write_offset] = 'y';  //触发越界访问
...... 

对应汇编代码

   0xffff80007dbf0174 <+336>:	bl	0xffff800080321be4   //1.这里完成分配内存的shadow标记
   0xffff80007dbf0178 <+340>:	mov	x2, x0
   0xffff80007dbf017c <+344>:	add	x1, x22, #0x80
   0xffff80007dbf0180 <+348>:	mov	x24, x0
   0xffff80007dbf0184 <+352>:	add	x0, x22, #0xc0
   0xffff80007dbf0188 <+356>:	bl	0xffff800080154834 <_printk>
   0xffff80007dbf018c <+360>:	add	x0, x24, #0x81
   0xffff80007dbf0190 <+364>:	bl	0xffff8000803b70d8 <__asan_store1>  //2.这里来检查内存访问是否合法
   0xffff80007dbf0194 <+368>:	mov	w1, #0x79                  	// #121
   0xffff80007dbf0198 <+372>:	strb	w1, [x24, #129]
   0xffff80007dbf019c <+376>:	mov	x0, x24
   0xffff80007dbf01a0 <+380>:	bl	0xffff800080322a8c 

这里实际分成两步:

a、在kmalloc时设置shadow标记;

b、在访问时根据指针操作的范围给kasan检查传入指针和长度的检查,对比tag标记确认指针操作是否合法

5.1.kmalloc时设置tag 标记分析:

__kmalloc
-->kmalloc_slab
-->__kasan_kmalloc 
    -->kasan_poison_last_granule
    -->kasan_poison

Dump of assembler code for function kasan_poison_last_granule:
   0xffff8000803b89ec <+0>:	ands	x2, x1, #0x7
   0xffff8000803b89f0 <+4>:	b.eq	0xffff8000803b8a08   // b.none
   0xffff8000803b89f4 <+8>:	add	x0, x0, x1
   0xffff8000803b89f8 <+12>:	mov	x1, #0x800000000000        	// 熟悉的0xdfff800000000000
   0xffff8000803b89fc <+16>:	movk	x1, #0xdfff, lsl #48
   0xffff8000803b8a00 <+20>:	lsr	x0, x0, #3
   0xffff8000803b8a04 <+24>:	strb	w2, [x0, x1]
   0xffff8000803b8a08 <+28>:	ret

上面的代码完成ptr>>3 然后根据 size 长度,填充tag到shadow的地址:ptr>>3 + kasan_shadow_offset(0xdfff800000000000)

比如上面的kmalloc 128字节,指针值是0xffff000006e57400,我们查看它的shadow标记值,正好shadow值对应16个0(16*8 可用byte) :

0xffff000006e57400对应shadow值

5.2.内存访问时kasan是如何捕获异常

对应汇编代码
   0xffff80007dbf0174 <+336>:	bl	0xffff800080321be4   //1.这里完成分配内存的shadow标记
   0xffff80007dbf0178 <+340>:	mov	x2, x0
   0xffff80007dbf017c <+344>:	add	x1, x22, #0x80
   0xffff80007dbf0180 <+348>:	mov	x24, x0
   0xffff80007dbf0184 <+352>:	add	x0, x22, #0xc0
   0xffff80007dbf0188 <+356>:	bl	0xffff800080154834 <_printk>
   0xffff80007dbf018c <+360>:	add	x0, x24, #0x81         //注意这里传入的0x81,表示指针访问长度
   0xffff80007dbf0190 <+364>:	bl	0xffff8000803b70d8 <__asan_store1>  //2.这里来检查内存访问是否合法
   0xffff80007dbf0194 <+368>:	mov	w1, #0x79                  	// #121
   0xffff80007dbf0198 <+372>:	strb	w1, [x24, #129]
   0xffff80007dbf019c <+376>:	mov	x0, x24
   0xffff80007dbf01a0 <+380>:	bl	0xffff800080322a8c 

__asan_store1实现:
Dump of assembler code for function __asan_store1:
   0xffff8000803b70d8 <+0>:	paciasp
   0xffff8000803b70dc <+4>:	stp	x29, x30, [sp, #-16]!
   0xffff8000803b70e0 <+8>:	xpaclri
   0xffff8000803b70e4 <+12>:	mov	x29, sp
   0xffff8000803b70e8 <+16>:	cmn	x0, #0x1
   0xffff8000803b70ec <+20>:	b.cs	0xffff8000803b7128 <__asan_store1+80>  // b.hs, b.nlast
   0xffff8000803b70f0 <+24>:	mov	x2, #0xfffeffffffffffff    	// #-281474976710657
   0xffff8000803b70f4 <+28>:	cmp	x0, x2
   0xffff8000803b70f8 <+32>:	b.ls	0xffff8000803b7128 <__asan_store1+80>  // b.plast
   0xffff8000803b70fc <+36>:	lsr	x3, x0, #3              // 1.x0指针右移3位后存放在x3
   0xffff8000803b7100 <+40>:	mov	x2, #0x800000000000     // 2.X2 存储kasan_offset 0xdfff800000000000 
   0xffff8000803b7104 <+44>:	movk	x2, #0xdfff, lsl #48
   0xffff8000803b7108 <+48>:	ldrsb	w2, [x3, x2]            // 3.读取x3+x2地址的值,即tag值
   0xffff8000803b710c <+52>:	cbnz	w2, 0xffff8000803b711c <__asan_store1+68>
   0xffff8000803b7110 <+56>:	ldp	x29, x30, [sp], #16
   0xffff8000803b7114 <+60>:	autiasp
   0xffff8000803b7118 <+64>:	ret
   0xffff8000803b711c <+68>:	and	w1, w0, #0x7   //4.取待访问指针访问长度的一字节访问长度
   0xffff8000803b7120 <+72>:	cmp	w2, w1         //5.和shadow值做比较
   0xffff8000803b7124 <+76>:	b.gt	0xffff8000803b7110 <__asan_store1+56>
   0xffff8000803b7128 <+80>:	mov	x3, x30
   0xffff8000803b712c <+84>:	mov	w2, #0x1                   	// #1
   0xffff8000803b7130 <+88>:	mov	x1, #0x1                   	// #1
   0xffff8000803b7134 <+92>:	bl	0xffff8000803b67a0  //6、shadow允许访问长度<指针访问长度时触发异常
   0xffff8000803b7138 <+96>:	ldp	x29, x30, [sp], #16
   0xffff8000803b713c <+100>:	autiasp
   0xffff8000803b7140 <+104>:	ret

1、传入指针和长度后,指针操作范围计算shadow存放地址

ptr >> 3 + kasan_offset

2、从shadow存放地址取出shadow值,然后和访问长度比较(转换单byte范围)

3、比如这里测试用例是分配128, 访问128+2 位置, 转换成地址(char*)index就是0x81

linux kernel 内存踩踏之KASAN(一)_第3张图片

(gdb) x /30b 0xFFFF600000DCAE80
0xffff600000dcae80:	0x00	0x00	0x00	0x00	0x00	0x00	0x00	0x00
0xffff600000dcae88:	0x00	0x00	0x00	0x00	0x00	0x00	0x00	0x00
0xffff600000dcae90:	[0xfc]	0xfc	0xfc	0xfc	0xfc	0xfc	0xfc	0xfc
0xffff600000dcae98:	0xfc	0xfc	0xfc	0xfc	0xfc	0xfc
shadow值0xffff600000dcae80对应地址0xffff000006e57400;  
shadow值0xffff600000dcae90就是0xffff000006e57400 + 0x81 ,

4、读取的地址0xffff000006e57400 + 0x81:

要求这里的shadow值0~7, 但是实际是0xfc(KASAN_SLAB_REDZONE),所以触发 kasan_report

linux kernel 内存踩踏之KASAN(一)_第4张图片

其他测试用例(由于实现原理类似,不逐一展开分析):

触发page 内存踩踏
/dev # echo 1 > kasan_test 
[   47.775781] pagealloc_oob_right ffff000004265000
[   47.776110] ==================================================================
[   47.777583] BUG: KASAN: use-after-free in kasan_testcase_write+0x3e0/0x4d8 [kasan_driver]
[   47.780457] Read of size 1 at addr ffff000004266000 by task sh/179
[   47.781456] 
[   47.782662] CPU: 1 PID: 179 Comm: sh Tainted: G                 N 6.6.1-g3cba94c761ec-dirty #15
[   47.783727] Hardware name: linux,dummy-virt (DT)
[   47.784470] Call trace:
[   47.784783]  dump_backtrace+0x90/0xe8
[   47.785203]  show_stack+0x18/0x24
[   47.785515]  dump_stack_lvl+0x48/0x60
[   47.785785]  print_report+0xf8/0x5d8
[   47.786054]  kasan_report+0xc4/0x108
[   47.786303]  __asan_load1+0x60/0x6c
[   47.786806]  kasan_testcase_write+0x3e0/0x4d8 [kasan_driver]
[   47.787390]  vfs_write+0x158/0x45c
[   47.787656]  ksys_write+0xd0/0x180
[   47.787884]  __arm64_sys_write+0x44/0x58
[   47.788165]  invoke_syscall+0x60/0x184
[   47.788442]  el0_svc_common.constprop.0+0x78/0x13c
[   47.788761]  do_el0_svc+0x30/0x40
[   47.789029]  el0_svc+0x38/0x70
[   47.789214]  el0t_64_sync_handler+0x120/0x12c
[   47.789417]  el0t_64_sync+0x190/0x194
[   47.789708] 
[   47.789900] The buggy address belongs to the physical page:
[   47.790263] page:(____ptrval____) refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x44266
[   47.790795] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[   47.791171] page_type: 0xffffffff()
[   47.791590] raw: 03fffc0000000000 fffffc00001099c8 ffff00006af4d758 0000000000000000
[   47.791876] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[   47.792185] page dumped because: kasan: bad access detected
[   47.792400] 
[   47.792513] Memory state around the buggy address:
[   47.792842]  ffff000004265f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   47.793129]  ffff000004265f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   47.793394] >ffff000004266000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   47.793694]                    ^
[   47.793896]  ffff000004266080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   47.794152]  ffff000004266100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   47.794554] ==================================================================
[   47.795106] Disabling lock debugging due to kernel taint
[   47.795450] kasan_test_case type 1


触发全局变量内存踩踏
/dev # echo 2 > kasan_test 
[   54.484659] ==================================================================
[   54.484983] BUG: KASAN: global-out-of-bounds in kasan_testcase_write+0x2c0/0x4d8 [kasan_driver]
[   54.485402] Read of size 4 at addr ffff80007dbf20a8 by task sh/179
[   54.485638] 
[   54.485772] CPU: 1 PID: 179 Comm: sh Tainted: G    B            N 6.6.1-g3cba94c761ec-dirty #15
[   54.486069] Hardware name: linux,dummy-virt (DT)
[   54.486249] Call trace:
[   54.486380]  dump_backtrace+0x90/0xe8
[   54.486575]  show_stack+0x18/0x24
[   54.486744]  dump_stack_lvl+0x48/0x60
[   54.486930]  print_report+0x318/0x5d8
[   54.487113]  kasan_report+0xc4/0x108
[   54.487293]  __asan_load4+0x9c/0xb8
[   54.487473]  kasan_testcase_write+0x2c0/0x4d8 [kasan_driver]
[   54.487754]  vfs_write+0x158/0x45c
[   54.487937]  ksys_write+0xd0/0x180
[   54.488108]  __arm64_sys_write+0x44/0x58
[   54.488294]  invoke_syscall+0x60/0x184
[   54.488484]  el0_svc_common.constprop.0+0x78/0x13c
[   54.488698]  do_el0_svc+0x30/0x40
[   54.488876]  el0_svc+0x38/0x70
[   54.489044]  el0t_64_sync_handler+0x120/0x12c
[   54.489244]  el0t_64_sync+0x190/0x194
[   54.489431] 
[   54.489583] The buggy address belongs to the variable:
[   54.489776]  global_kasan_value+0x8/0xffffffffffffef60 [kasan_driver]
[   54.490085] 
[   54.490190] Memory state around the buggy address:
[   54.490382]  ffff80007dbf1f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[   54.490637]  ffff80007dbf2000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9
[   54.490893] >ffff80007dbf2080: f9 f9 f9 f9 00 f9 f9 f9 f9 f9 f9 f9 00 00 00 00
[   54.491166]                                   ^
[   54.491356]  ffff80007dbf2100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   54.491609]  ffff80007dbf2180: 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 f9
[   54.491856] ==================================================================
[   54.492485] global arr oob access 0
[   54.492722] kasan_test_case type 2

触发stack内存踩踏
/dev # echo 3 > kasan_test 
[   75.450592] ==================================================================
[   75.452056] BUG: KASAN: stack-out-of-bounds in kasan_testcase_write+0x414/0x4d8 [kasan_driver]
[   75.454159] Read of size 1 at addr ffff8000873b7b1e by task sh/179
[   75.455514] 
[   75.456157] CPU: 1 PID: 179 Comm: sh Tainted: G    B            N 6.6.1-g3cba94c761ec-dirty #15
[   75.457488] Hardware name: linux,dummy-virt (DT)
[   75.458119] Call trace:
[   75.458581]  dump_backtrace+0x90/0xe8
[   75.459060]  show_stack+0x18/0x24
[   75.459488]  dump_stack_lvl+0x48/0x60
[   75.459950]  print_report+0x318/0x5d8
[   75.460412]  kasan_report+0xc4/0x108
[   75.460872]  __asan_load1+0x60/0x6c
[   75.461068]  kasan_testcase_write+0x414/0x4d8 [kasan_driver]
[   75.461358]  vfs_write+0x158/0x45c
[   75.461550]  ksys_write+0xd0/0x180
[   75.461719]  __arm64_sys_write+0x44/0x58
[   75.461904]  invoke_syscall+0x60/0x184
[   75.462092]  el0_svc_common.constprop.0+0x78/0x13c
[   75.462328]  do_el0_svc+0x30/0x40
[   75.462500]  el0_svc+0x38/0x70
[   75.462816]  el0t_64_sync_handler+0x120/0x12c
[   75.463091]  el0t_64_sync+0x190/0x194
[   75.463336] 
[   75.463560] The buggy address belongs to stack of task sh/179
[   75.463929]  and is located at offset 142 in frame:
[   75.464205]  kasan_testcase_write+0x0/0x4d8 [kasan_driver]
[   75.464666] 
[   75.464913] This frame has 4 objects:
[   75.465338]  [48, 52) 'i'
[   75.465413]  [64, 72) 'array'
[   75.465635]  [96, 104) 'array'
[   75.465813]  [128, 138) 'stack_array'
[   75.465977] 
[   75.466241] The buggy address belongs to the virtual mapping at
[   75.466241]  [ffff8000873b0000, ffff8000873b9000) created by:
[   75.466241]  kernel_clone+0xb4/0x470
[   75.466756] 
[   75.466968] The buggy address belongs to the physical page:
[   75.467185] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4ca7c
[   75.467501] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[   75.467743] page_type: 0xffffffff()
[   75.467923] raw: 03fffc0000000000 0000000000000000 dead000000000122 0000000000000000
[   75.468199] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[   75.468459] page dumped because: kasan: bad access detected
[   75.468660] 
[   75.468764] Memory state around the buggy address:
[   75.468955]  ffff8000873b7a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   75.469214]  ffff8000873b7a80: 00 00 f1 f1 f1 f1 f1 f1 04 f2 00 f2 f2 f2 00 f2
[   75.469478] >ffff8000873b7b00: f2 f2 00 02 f3 f3 00 00 00 00 00 00 00 00 00 00
[   75.469725]                             ^
[   75.469903]  ffff8000873b7b80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00
[   75.470158]  ffff8000873b7c00: 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 f3 f3 f3 f3
[   75.470407] ==================================================================
[   75.470858] kasan_stack_oob  0
[   75.471036] kasan_test_case type 3

触发page use  after free
/dev # echo 4 > kasan_test 
[   80.572006] pagealloc_uaf ffff000004265000
[   80.572276] ==================================================================
[   80.573408] BUG: KASAN: use-after-free in kasan_testcase_write+0x288/0x4d8 [kasan_driver]
[   80.574439] Read of size 1 at addr ffff000004265000 by task sh/179
[   80.575262] 
[   80.575562] CPU: 1 PID: 179 Comm: sh Tainted: G    B            N 6.6.1-g3cba94c761ec-dirty #15
[   80.576651] Hardware name: linux,dummy-virt (DT)
[   80.577286] Call trace:
[   80.577887]  dump_backtrace+0x90/0xe8
[   80.578659]  show_stack+0x18/0x24
[   80.579220]  dump_stack_lvl+0x48/0x60
[   80.579548]  print_report+0xf8/0x5d8
[   80.579839]  kasan_report+0xc4/0x108
[   80.580055]  __asan_load1+0x60/0x6c
[   80.580236]  kasan_testcase_write+0x288/0x4d8 [kasan_driver]
[   80.580523]  vfs_write+0x158/0x45c
[   80.580706]  ksys_write+0xd0/0x180
[   80.580887]  __arm64_sys_write+0x44/0x58
[   80.581126]  invoke_syscall+0x60/0x184
[   80.581378]  el0_svc_common.constprop.0+0x78/0x13c
[   80.581653]  do_el0_svc+0x30/0x40
[   80.581893]  el0_svc+0x38/0x70
[   80.582130]  el0t_64_sync_handler+0x120/0x12c
[   80.582425]  el0t_64_sync+0x190/0x194
[   80.582701] 
[   80.582861] The buggy address belongs to the physical page:
[   80.583170] page:(____ptrval____) refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x44265
[   80.583687] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[   80.584071] page_type: 0xffffffff()
[   80.584354] raw: 03fffc0000000000 fffffc0000109988 ffff00006af4d758 0000000000000000
[   80.584774] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[   80.585195] page dumped because: kasan: bad access detected
[   80.585532] 
[   80.585697] Memory state around the buggy address:
[   80.586005]  ffff000004264f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   80.586408]  ffff000004264f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   80.586783] >ffff000004265000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   80.587041]                    ^
[   80.587203]  ffff000004265080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   80.587465]  ffff000004265100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
[   80.587716] ==================================================================
[   80.588370] pagealloc_uaf 204
[   80.588569] kasan_test_case type 4


触发vmalloc内存踩踏
/dev # echo 5 > kasan_test 
[   86.262697] vmalloc_oob ffff800085bf5000
[   86.262824] ==================================================================
[   86.263246] BUG: KASAN: vmalloc-out-of-bounds in kasan_testcase_write+0x47c/0x4d8 [kasan_driver]
[   86.263603] Read of size 1 at addr ffff800085bf5800 by task sh/179
[   86.263816] 
[   86.263932] CPU: 5 PID: 179 Comm: sh Tainted: G    B            N 6.6.1-g3cba94c761ec-dirty #15
[   86.264229] Hardware name: linux,dummy-virt (DT)
[   86.264395] Call trace:
[   86.264525]  dump_backtrace+0x90/0xe8
[   86.264706]  show_stack+0x18/0x24
[   86.264860]  dump_stack_lvl+0x48/0x60
[   86.265059]  print_report+0x318/0x5d8
[   86.265250]  kasan_report+0xc4/0x108
[   86.265434]  __asan_load1+0x60/0x6c
[   86.265627]  kasan_testcase_write+0x47c/0x4d8 [kasan_driver]
[   86.265921]  vfs_write+0x158/0x45c
[   86.266113]  ksys_write+0xd0/0x180
[   86.266287]  __arm64_sys_write+0x44/0x58
[   86.266476]  invoke_syscall+0x60/0x184
[   86.266672]  el0_svc_common.constprop.0+0x78/0x13c
[   86.266892]  do_el0_svc+0x30/0x40
[   86.267078]  el0_svc+0x38/0x70
[   86.267251]  el0t_64_sync_handler+0x120/0x12c
[   86.267456]  el0t_64_sync+0x190/0x194
[   86.267640] 
[   86.267757] The buggy address belongs to the virtual mapping at
[   86.267757]  [ffff800085bf5000, ffff800085bf7000) created by:
[   86.267757]  kasan_testcase_write+0x444/0x4d8 [kasan_driver]
[   86.268317] 
[   86.268428] The buggy address belongs to the physical page:
[   86.268644] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4cd8a
[   86.268963] flags: 0x3fffc0000000000(node=0|zone=0|lastcpupid=0xffff)
[   86.269271] page_type: 0xffffffff()
[   86.269461] raw: 03fffc0000000000 0000000000000000 dead000000000122 0000000000000000
[   86.269746] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[   86.270008] page dumped because: kasan: bad access detected
[   86.270209] 
[   86.270316] Memory state around the buggy address:
[   86.270511]  ffff800085bf5700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   86.270771]  ffff800085bf5780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   86.271035] >ffff800085bf5800: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[   86.271289]                    ^
[   86.271470]  ffff800085bf5880: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[   86.271748]  ffff800085bf5900: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
[   86.271997] ==================================================================
[   86.272484] vmalloc_oob 0
[   86.272779] kasan_test_case type 5

六、小结

kasan 的核心思想是简单的,复杂主要体现在编译器插桩实现(好在gcc/clang都已经支持了),在所有分配的部分需要完成shadow的存储,所有读写的地方加入指针访问地址长度的shadow值检查。我们业务使用是比较简单的,工程应用上需要注意的一些点就是:

1、打开kasan后kenerl会变大,需要考虑boot分区的大小限制(预先需要足够)

2、bootloader引导时也需要注意物理地址划分,以前也遇到过将后面rootfs(ramdisk)覆盖导致无法启动的情况

3、默认是kasan report只是内核打印一次(后续触发也不会上报),大量机器测试时需要人力或者自动化脚本检查,出现问题我们想看下上下文或者一些变量状态也不方便,实际业务中通常增加 cmdline: kasan.fault=panic,这样发生问题时能保存现场,测试/开发同事也能第一时间发现并分析。

参考资料:

KASAN实现原理

HWAddress Sanitizer | Android NDK | Android Developers

Arm Memory Tagging Extension (MTE) | Android NDK | Android Developers

Address Sanitizer | Android NDK | Android Developers

https://developer.android.google.cn/ndk/guides/memory-debug?hl=zh-cn

Kernel page table dump

你可能感兴趣的:(Linux调试技术,linux,kernel内存踩踏,kasan)