elf文件结构分析

elf简介

ELF(Excutable and Linking Format)是可执行与链接格式的缩写,是linux下最重要的一种可执行文件格式。如想深入了解linux下的程序运行原理,对elf文件组织格式的了解是必不可少的。
网上有很多elf结构分析相关的文章,这里推荐一个pdf《ELF文件格式分析.pdf》,细节讲的比较系统,大家可以参考,可以到https://download.csdn.net/download/t3swing/10302876下载。

elf格式介绍

linux可以通过readelf命令来查看elf文件格式,elf格式是一个多级索引结构,基本结构如下图所示:
elf文件结构分析_第1张图片

有几个概念:

  • Section:一般翻译为,很多文章都与等同了,但一般说的指的是segment。每个节都定义了具体独立的功能,elf链接的时候,多个目标(.o)文件的节会合到一起。elf的通过节表来索引具体的节,每个节的解析方式都可能不一样,可以对照着文档解析与理解不同节的含义。
  • segment:段是节的集合,一个段包含多个同属性(读、写、执行属性)的节,一般用在elf加载运行过程中,segment会指定地址范围,落在该地址范围的section,程序执行时,对应的section会加载该段里面。至于节和段的地址是怎么生成的,这都是ld链接程序确定的,ld命令参数非常多,甚至可以自定义segment中包含的节,同一份代码,不同平台生成的elf段和节的对应关系都可能不一样。
  • Program:程序表,elf中主要用来管理segment。

注意:上述图只是elf的结构示意图,不代表具体位置,实际上,elf只规定了总头位置,其他的位置都是根据offset与size来确定的,图中也没有体现出segment与section的关系。

elf文件格式解析

可以尝试自己解析elf来加深对elf文件结构的理解,解析elf格式,结构体定义可以直接使用linux下的/usr/include/elf.h头文件,该头文件包含32位和64位的elf解析相关结构体。

  • linux下解析elf直接引用/usr/include/elf.h即可。
  • windows下,可以把/usr/include/elf.h复制过去,并替换基础变量名称定义即可。如下:
#include 
#include 

改成

typedef signed char             int8_t;
typedef short int               int16_t;
typedef int                     int32_t;
typedef long long int           int64_t;

/* Unsigned.  */
typedef unsigned char           uint8_t;
typedef unsigned short int      uint16_t;
typedef unsigned int            uint32_t;
typedef unsigned long long int  uint64_t;

解析elf之前,需先了解一下elf头文件中的各种缩写,如下:

  • hdr、h:header的缩写,如Elf32_Ehdr、Elf32_Shdr、sh_name等;
  • e:elf缩写,如Ehdr,e_type中的e等;
  • s:section节的缩写,如Shdr、sh_size等;
  • p:Program程序的缩写,如Phdr、p_type等;
  • t :table表的缩写,elf是多级索引结构,通过表来完成;
  • sym:Symbol符号的缩写;
  • INTERP : interpreter解释器缩写;

elf头文件命令习惯用单个字母组合完成,如sht表示section header table即节头表,st表示symbol table即符号表,缩写有很多,elf.h的注释一般都解释了缩写代表什么意思。

对照着上节的结构体的结构图和elf.h很容易解析elf文件结构,有一个细节需注意,解析节时必须先找到.shstrtab这个包含节头名称字符串表的节,只有知道了各节头的名称,才能继续解析各个节的内容。字符串表(string table)有多个(如.shstrtab、.strtab、.dynstr等),不同情况下用不同的字符串解析,如节头名称不能用.strtab或者.dynstr字符串表去解析。

自己尝试解析elf文件,可以很快的掌握elf结构,比看文档快,熟悉结构前,不要对着二进制去看,效率太低。细节方面可以查看文档,下面是一个简单的elf解析程序,仿照readelf输出格式,主要解析了节头、段头及符号表部分信息,大家可以参考,源码如下:

#include 
#include 
#include 
#include "elf.h"

#define INVALID                 (-1)

#define SECTION_NAME_SHSTRTAB   ".shstrtab"

#define SECTION_NAME_SYMTAB     ".symtab"
#define SECTION_NAME_STRTAB     ".strtab"

#define SECTION_NAME_DYNSYM     ".dynsym"
#define SECTION_NAME_DYNSTR     ".dynstr"

/**
* 字串和枚举转换结构体
*/
typedef struct
{
    int enum_value;
    char * enum_str;
} ENUM_S;

char * enum2str(ENUM_S * specs_enum, int enum_value)
{
    int i = 0;

    if (NULL == specs_enum)
    {
        printf("param error! enum:%#x\n", (int)specs_enum);
        return NULL;
    }

    for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++)
    {
        if (specs_enum->enum_value == enum_value)
        {
            return specs_enum->enum_str;
        }
    }
    printf("enum_value not match:%d \n", enum_value);

    return NULL;
}

int str2enum(ENUM_S * specs_enum, const char * enum_str)
{
    int i = 0;

    if ((NULL == specs_enum) || (NULL == enum_str))
    {
        printf("param error! specs_enum:%#x enum_str:%#x\n", (int)specs_enum, (int)enum_str);
        return -1;
    }

    for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++)
    {
        if (0 == strcmp(specs_enum->enum_str, enum_str))
        {
            return (int)specs_enum->enum_value;
        }
    }
    printf("enum_str not match:%s \n", enum_str);

    return -1;
}


char * readFile(char * file, int * size)
{
    int fsize = 0;
    char * mem = NULL;
    FILE * fp = NULL;

    fp = fopen(file, "rb");
    if (fp == NULL)
    {
        perror("fopen error!");
        goto RELEASE;
    }

    fseek(fp, 0, SEEK_END);
    fsize = ftell(fp);
    mem = malloc(fsize);
    if (mem == NULL)
    {
        perror("malloc error!");
        goto RELEASE;
    }
    printf("readFile size:%d\n", size);

    fseek(fp, 0, SEEK_SET);
    fread(mem, fsize, 1, fp);
    *size = fsize;

RELEASE:
    fclose(fp);

    return mem;
}

int parseHeader(Elf32_Ehdr * header)
{
    int i = 0;

    /* magic num */
    ENUM_S e_class[] = { {ELFCLASS32, "ELF32"}, {ELFCLASS64, "ELF64"}, {INVALID, NULL}};
    ENUM_S e_data[] = { {ELFDATA2LSB, "little endian"}, {ELFDATA2MSB, "big endian"}, {INVALID, NULL}};
    ENUM_S e_osabi[] = { {ELFOSABI_NONE, "UNIX - System V"}, {ELFOSABI_ARM_AEABI, "ARM EABI"}, {ELFOSABI_ARM, "ARM"}, {INVALID, NULL}};

    /* elf header */
    ENUM_S e_type[] =
    {
        {ET_REL, "Relocatable file"},
        {ET_EXEC, "Executable file"},
        {ET_DYN, "Shared object file"},
        {ET_CORE, "Core file"},
        {INVALID, NULL}
    };
    ENUM_S e_machine[] = {{EM_386, "Intel 80386"}, {EM_ARM, "ARM"}, {INVALID, NULL}};
    unsigned char * e_ident = header->e_ident;

    if ((e_ident[EI_MAG0] != ELFMAG0) || (e_ident[EI_MAG1] != ELFMAG1) ||
            (e_ident[EI_MAG2] != ELFMAG2) || (e_ident[EI_MAG2] != ELFMAG2))
    {
        printf("Not elf format!\n");
        return -1;
    }
    printf("ELF Header:\n\tMagic:");
    for (i = 0; i < EI_NIDENT; i++)
    {
        printf(" %02x", e_ident[i]);
    }
    printf("\n");
    printf("\tClass:                             %s\n", enum2str(e_class, e_ident[EI_CLASS]));
    printf("\tData:                              2's complement, %s\n",  enum2str(e_data, e_ident[EI_DATA]));
    printf("\tVersion:                           %d (current)\n", e_ident[EI_VERSION]);
    printf("\tOS/ABI:                            %s\n", enum2str(e_osabi, e_ident[EI_OSABI]));
    printf("\tABI Version:                       %d\n", e_ident[EI_ABIVERSION]);
    printf("\tType:                              %s\n", enum2str(e_type, header->e_type));
    printf("\tMachine:                           %s\n", enum2str(e_machine, header->e_machine));
    printf("\tVersion:                           0x%x\n", header->e_version);
    printf("\tEntry point address:               0x%x\n", header->e_entry);
    printf("\tStart of program headers:          %d (bytes into file)\n", header->e_phoff);
    printf("\tStart of section headers:          %d (bytes into file)\n", header->e_shoff);
    printf("\tFlags:                             0x%x\n", header->e_flags);
    printf("\tSize of this header:               %d (bytes)\n", header->e_ehsize);
    printf("\tSize of program headers:           %d (bytes)\n", header->e_phentsize);
    printf("\tNumber of program headers:         %d\n", header->e_phnum);
    printf("\tSize of section headers:           %d (bytes)\n", header->e_shentsize);
    printf("\tNumber of section headers:         %d\n", header->e_shnum);
    printf("\tSection header string table index: %d\n", header->e_shstrndx);

    return 0;
}

char * getSectionTypeName(Elf32_Word sh_type)
{
    ENUM_S sh_type_list[] =
    {
        {SHT_NULL, "NULL"},
        {SHT_PROGBITS, "PROGBITS"},
        {SHT_SYMTAB, "SYMTAB"},
        {SHT_STRTAB, "STRTAB"},
        {SHT_RELA, "RELA"},
        {SHT_HASH, "HASH"},
        {SHT_DYNAMIC, "DYNAMIC"},
        {SHT_NOTE, "NOTE"},
        {SHT_NOBITS, "NOBITS"},
        {SHT_REL, "REL"},
        {SHT_SHLIB, "SHLIB"},
        {SHT_DYNSYM, "DYNSYM"},
        {SHT_INIT_ARRAY, "INIT_ARRAY"},
        {SHT_FINI_ARRAY, "FINI_ARRAY"},
        {SHT_PREINIT_ARRAY, "PREINIT_ARRAY"},
        {SHT_GROUP, "GROUP"},
        {SHT_SYMTAB_SHNDX, "SYMTAB_SHNDX"},
        {SHT_NUM, "NUM"},
        {SHT_LOOS , "LOOS"},
        {SHT_GNU_ATTRIBUTES , "GNU_ATTRIBUTES"},
        {SHT_GNU_HASH , "GNU_HASH"},
        {SHT_GNU_LIBLIST , "GNU_LIBLIST"},
        {SHT_CHECKSUM , "CHECKSUM"},
        {SHT_LOSUNW  , "LOSUNW"},
        {SHT_SUNW_move , "SUNW_move"},
        {SHT_SUNW_COMDAT , "SUNW_COMDAT"},
        {SHT_SUNW_syminfo , "SUNW_syminfo"},
        {SHT_GNU_verdef , "GNU_verdef"},
        {SHT_GNU_verneed , "GNU_verneed"},
        {SHT_GNU_versym , "GNU_versym"},
        {SHT_HISUNW  , "HISUNW"},
        {SHT_HIOS , "HIOS"},
        {SHT_LOPROC , "LOPROC"},
        {SHT_HIPROC , "HIPROC"},
        {SHT_LOUSER , "LOUSER"},
        {SHT_HIUSER , "HIUSER"},
        {INVALID, NULL}
    };

    return enum2str(sh_type_list, (int)sh_type);
}

char * getStrTabStr(Elf32_Ehdr * ehdr, Elf32_Shdr * strtabhdr, int idx)
{
    if (idx < strtabhdr->sh_size)
    {
        return ((char *)ehdr + strtabhdr->sh_offset + idx);
    }

    return NULL;
}

Elf32_Shdr * getSHStrTab(Elf32_Ehdr * ehdr)
{
    int i = 0, shnum = ehdr->e_shnum;
    char * nameStr = NULL;
    Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);

    for (i = 0; i < shnum; i++, shdr++)
    {
        if (shdr->sh_type == SHT_STRTAB)
        {
            if (shdr->sh_name >= shdr->sh_size)
            {
                continue;
            }

            nameStr = (char *)ehdr + shdr->sh_offset + shdr->sh_name;
            if (strcmp(nameStr, SECTION_NAME_SHSTRTAB) == 0)
            {
                return shdr;
            }
        }
    }

    return NULL;
}

Elf32_Shdr * getSHdrByName(Elf32_Ehdr * ehdr, const char * name, Elf32_Word sh_type)
{
    int i = 0, shnum = ehdr->e_shnum;
    char * nameStr = NULL;
    Elf32_Shdr * shstrtabhdr = NULL;
    Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);

    shstrtabhdr = getSHStrTab(ehdr);
    if (shstrtabhdr == NULL)
    {
        printf("getSHdrByName Get shstrtabhdr failed name:%s sh_type:%d\n", name, sh_type);
        return NULL;
    }

    shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
    for (i = 0; i < shnum; i++, shdr++)
    {
        if (shdr->sh_type == sh_type)
        {
            nameStr = getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name);
            if (nameStr && strcmp(nameStr, name) == 0)
            {
                return shdr;
            }
        }
    }

    printf("getSHdrByName Get section header failed name:%s sh_type:%d\n", name, sh_type);
    return NULL;
}

int parseSectionHeader(Elf32_Ehdr * ehdr)
{
    int i = 0, shnum = ehdr->e_shnum;
    Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
    Elf32_Shdr * shstrtabhdr = NULL;

    shstrtabhdr = getSHStrTab(ehdr);
    if (shstrtabhdr == NULL)
    {
        printf("parseSectionHeader Get shstrtabhdr failed\n");
        return -1;
    }

    printf("\nThere are %d section headers, starting at offset 0x%d:\n", shnum, ehdr->e_shoff);
    printf("Section Headers:\n");
    printf("  [Nr] Name                     Type            Addr     Off    Size   ES Flg Lk Inf Al\n");
    for (i = 0; i < shnum; i++, shdr++)
    {
        printf("  [%2d] ", i);
        printf("%-24s ", getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));
        printf("%-15s ", getSectionTypeName(shdr->sh_type));
        printf("%08x ", shdr->sh_addr);
        printf("%06x ", shdr->sh_offset);
        printf("%06x ", shdr->sh_size);
        printf("%02x ", shdr->sh_entsize);
        printf("%03x ", shdr->sh_flags);
        printf("%02x ", shdr->sh_link);
        printf("%03x ", shdr->sh_info);
        printf("%02x ", shdr->sh_addralign);
        printf("\n");
    }

    return 0;
}

/* 解析动态和静态符号表时,须告知符号节名、类型及符号表用到字串节名称 */
int parseSymTab(Elf32_Ehdr * ehdr, char * symtab, char * strtab, Elf32_Word sh_type)
{
    int i = 0, sym_cnt = 0;
    Elf32_Shdr * shdrStrTab = NULL;
    Elf32_Shdr * shdrSymTab = NULL;
    Elf32_Sym * sym = NULL;

    /* Elf32_Sym st_info_binding */
    ENUM_S st_info_bind[] =
    {
        {STB_LOCAL, "LOCAL"},
        {STB_GLOBAL, "GLOBAL"},
        {STB_WEAK, "WEAK"},
        {STB_NUM, "NUM"},
        {STB_HIOS, "HIOS"},
        {STB_LOPROC, "LOPROC"},
        {STB_HIPROC, "HIPROC"},
        {INVALID, NULL}
    };
    /* Elf32_Sym st_info_type */
    ENUM_S st_info_type[] =
    {
        {STT_NOTYPE, "NOTYPE"},
        {STT_OBJECT, "OBJECT"},
        {STT_FUNC, "FUNC"},
        {STT_SECTION, "SECTION"},
        {STT_FILE, "FILE"},
        {STT_COMMON, "COMMON"},
        {STT_TLS, "TLS"},
        {STT_NUM, "NUM"},
        {INVALID, NULL}
    };
    /*Elf32_Sym visibility */
    ENUM_S st_other_visibility[] =
    {
        {STV_DEFAULT, "DEFAULT"},
        {STV_INTERNAL, "INTERNAL"},
        {STV_HIDDEN, "HIDDEN"},
        {STV_PROTECTED, "PROTECTED"},
        {INVALID, NULL}
    };

    shdrSymTab = getSHdrByName(ehdr, symtab, sh_type);
    if (shdrSymTab == NULL)
    {
        printf("parseSymTab getSymTab faild\n");
        return -1;
    }
    shdrStrTab = getSHdrByName(ehdr, strtab, SHT_STRTAB);
    if (shdrStrTab == NULL)
    {
        printf("parseSymTab getStrTab faild\n");
        return -1;
    }
    sym_cnt = shdrSymTab->sh_size / sizeof(Elf32_Sym);
    printf("\nSymbol table '%s' contains %d entries:\n", symtab, sym_cnt);
    printf("   Num:    Value  Size Type    Bind   Vis      Ndx   Name\n");

    sym = (Elf32_Sym *)((char *)ehdr + shdrSymTab->sh_offset);
    for (i = 0; i < sym_cnt; i++, sym++)
    {
        printf("%6d: ", i);
        printf("%08x ", sym->st_value);
        printf("%5d ", sym->st_size);
        printf("%-7s ", enum2str(st_info_type, ELF32_ST_TYPE(sym->st_info)));
        printf("%-6s ", enum2str(st_info_bind, ELF32_ST_BIND(sym->st_info)));
        printf("%-8s ", enum2str(st_other_visibility, ELF32_ST_VISIBILITY(sym->st_other)));
        printf("%5d ", sym->st_shndx);
        printf("%-32s", getStrTabStr(ehdr, shdrStrTab, sym->st_name));
        printf("\n");
    }

    return 0;
}

int parseProgramHeader(Elf32_Ehdr * ehdr)
{
    int i = 0, phnum = ehdr->e_phnum;
    Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);
    ENUM_S p_type[] =
    {
        {PT_NULL, "NULL"},
        {PT_LOAD, "LOAD"},
        {PT_DYNAMIC, "DYNAMIC"},
        {PT_INTERP, "INTERP"},
        {PT_NOTE, "NOTE"},
        {PT_SHLIB, "SHLIB"},
        {PT_PHDR, "PHDR"},
        {PT_TLS, "TLS"},
        {PT_GNU_EH_FRAME, "GNU_EH_FRAME"},
        {PT_GNU_STACK, "GNU_STACK"},
        {PT_GNU_RELRO, "GNU_RELRO"},
        {INVALID, NULL},
    };
    ENUM_S p_flags[] =
    {
        {PF_X, "X"},
        {PF_W, "W"},
        {PF_R, "R"},
        {PF_X | PF_W, "WE"},
        {PF_X | PF_R, "RE"},
        {PF_W | PF_R, "RW"},
        {PF_X | PF_W | PF_R, "RWE"},
        {INVALID, NULL},
    };

    printf("\nProgram Headers:\n");
    printf("  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align\n");
    for (i = 0; i < phnum; i++, phdr++)
    {
        printf("  %-12s ", enum2str(p_type, phdr->p_type));
        printf("0x%08x ", phdr->p_offset);
        printf("0x%08x ", phdr->p_vaddr);
        printf("0x%08x ", phdr->p_paddr);
        printf("0x%08x ", phdr->p_filesz);
        printf("0x%08x ", phdr->p_memsz);
        printf("%-3s ", enum2str(p_flags, phdr->p_flags));
        printf("%#04x ", phdr->p_align);
        printf("\n");

        /* 解释器类型,则打印出解析器名称 */
        if (phdr->p_type == PT_INTERP)
        {
            printf("    [Requesting program interpreter:%s]\n", (char *)ehdr + phdr->p_offset);
        }
    }

    return 0;
}

/* 解析段包含的section信息 */
int parseSegment(Elf32_Ehdr * ehdr)
{
    int i = 0, j = 0;
    Elf32_Shdr * shdr = NULL;
    Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);
    Elf32_Shdr * shstrtabhdr = NULL;

    shstrtabhdr = getSHStrTab(ehdr);
    if (shstrtabhdr == NULL)
    {
        printf("parseSectionHeader Get shstrtabhdr failed\n");
        return -1;
    }

    printf("\nSection to Segment mapping:\n");
    printf("  Segment Sections...\n");

    for (i = 0; i < ehdr->e_phnum; i++, phdr++)
    {
        printf("   %2.2d     ", i);

        shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
        for (j = 1; j < ehdr->e_shnum; j++, shdr++)
        {
            if (shdr->sh_size <= 0)
            {
                continue;
            }

            /* 地址或者偏移在segment地址范围内的section都归到该segment */
            if (shdr->sh_flags & SHF_ALLOC)
            {
                if ((shdr->sh_addr < phdr->p_vaddr) || (shdr->sh_addr + shdr->sh_size > phdr->p_vaddr + phdr->p_memsz))
                {
                    continue;
                }
            }
            else
            {
                if ((shdr->sh_offset < phdr->p_offset) || (shdr->sh_offset + shdr->sh_size > phdr->p_offset + phdr->p_filesz))
                {
                    continue;
                }
            }
            printf("%s ",getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));
        }
        printf("\n");
    }

    return 0;
}

int parseFile(char * mem, int size)
{
    int ret = 0;
    Elf32_Ehdr * ehdr = (Elf32_Ehdr *)mem;

    /* 解析elf总头 */
    parseHeader(ehdr);

    /* 解析section总头 */
    parseSectionHeader(ehdr);

    /* 解析.symtab与.dynsym */
    parseSymTab(ehdr,SECTION_NAME_SYMTAB,SECTION_NAME_STRTAB,SHT_SYMTAB);
    parseSymTab(ehdr,SECTION_NAME_DYNSYM,SECTION_NAME_DYNSTR,SHT_DYNSYM);

    /* 解析程序头与segment信息 */
    parseProgramHeader(ehdr);
    parseSegment(ehdr);

    return 0;
}

int main(int argc, char * argv[])
{
    int size = 0;
    char * mem = NULL;

    if( argc < 2)
    {
        printf("./readelf elf\n");
        return -1;
    }
    mem = readFile(argv[1], &size);
    parseFile(mem, size);

    if (mem)
    {
        free(mem);
    }

    return 0;
}

你可能感兴趣的:(linux)