ELF(Excutable and Linking Format)是可执行与链接格式的缩写,是linux下最重要的一种可执行文件格式。如想深入了解linux下的程序运行原理,对elf文件组织格式的了解是必不可少的。
网上有很多elf结构分析相关的文章,这里推荐一个pdf《ELF文件格式分析.pdf》,细节讲的比较系统,大家可以参考,可以到https://download.csdn.net/download/t3swing/10302876下载。
linux可以通过readelf
命令来查看elf文件格式,elf格式是一个多级索引结构,基本结构如下图所示:
有几个概念:
注意:上述图只是elf的结构示意图,不代表具体位置,实际上,elf只规定了总头位置,其他的位置都是根据offset与size来确定的,图中也没有体现出segment与section的关系。
可以尝试自己解析elf来加深对elf文件结构的理解,解析elf格式,结构体定义可以直接使用linux下的/usr/include/elf.h头文件,该头文件包含32位和64位的elf解析相关结构体。
#include
#include
改成
typedef signed char int8_t;
typedef short int int16_t;
typedef int int32_t;
typedef long long int int64_t;
/* Unsigned. */
typedef unsigned char uint8_t;
typedef unsigned short int uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
解析elf之前,需先了解一下elf头文件中的各种缩写,如下:
elf头文件命令习惯用单个字母组合完成,如sht表示section header table即节头表,st表示symbol table即符号表,缩写有很多,elf.h的注释一般都解释了缩写代表什么意思。
对照着上节的结构体的结构图和elf.h很容易解析elf文件结构,有一个细节需注意,解析节时必须先找到.shstrtab
这个包含节头名称字符串表的节,只有知道了各节头的名称,才能继续解析各个节的内容。字符串表(string table)有多个(如.shstrtab、.strtab、.dynstr等),不同情况下用不同的字符串解析,如节头名称不能用.strtab
或者.dynstr
字符串表去解析。
自己尝试解析elf文件,可以很快的掌握elf结构,比看文档快,熟悉结构前,不要对着二进制去看,效率太低。细节方面可以查看文档,下面是一个简单的elf解析程序,仿照readelf输出格式,主要解析了节头、段头及符号表部分信息,大家可以参考,源码如下:
#include
#include
#include
#include "elf.h"
#define INVALID (-1)
#define SECTION_NAME_SHSTRTAB ".shstrtab"
#define SECTION_NAME_SYMTAB ".symtab"
#define SECTION_NAME_STRTAB ".strtab"
#define SECTION_NAME_DYNSYM ".dynsym"
#define SECTION_NAME_DYNSTR ".dynstr"
/**
* 字串和枚举转换结构体
*/
typedef struct
{
int enum_value;
char * enum_str;
} ENUM_S;
char * enum2str(ENUM_S * specs_enum, int enum_value)
{
int i = 0;
if (NULL == specs_enum)
{
printf("param error! enum:%#x\n", (int)specs_enum);
return NULL;
}
for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++)
{
if (specs_enum->enum_value == enum_value)
{
return specs_enum->enum_str;
}
}
printf("enum_value not match:%d \n", enum_value);
return NULL;
}
int str2enum(ENUM_S * specs_enum, const char * enum_str)
{
int i = 0;
if ((NULL == specs_enum) || (NULL == enum_str))
{
printf("param error! specs_enum:%#x enum_str:%#x\n", (int)specs_enum, (int)enum_str);
return -1;
}
for (i = 0; specs_enum->enum_str != NULL; specs_enum++, i++)
{
if (0 == strcmp(specs_enum->enum_str, enum_str))
{
return (int)specs_enum->enum_value;
}
}
printf("enum_str not match:%s \n", enum_str);
return -1;
}
char * readFile(char * file, int * size)
{
int fsize = 0;
char * mem = NULL;
FILE * fp = NULL;
fp = fopen(file, "rb");
if (fp == NULL)
{
perror("fopen error!");
goto RELEASE;
}
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
mem = malloc(fsize);
if (mem == NULL)
{
perror("malloc error!");
goto RELEASE;
}
printf("readFile size:%d\n", size);
fseek(fp, 0, SEEK_SET);
fread(mem, fsize, 1, fp);
*size = fsize;
RELEASE:
fclose(fp);
return mem;
}
int parseHeader(Elf32_Ehdr * header)
{
int i = 0;
/* magic num */
ENUM_S e_class[] = { {ELFCLASS32, "ELF32"}, {ELFCLASS64, "ELF64"}, {INVALID, NULL}};
ENUM_S e_data[] = { {ELFDATA2LSB, "little endian"}, {ELFDATA2MSB, "big endian"}, {INVALID, NULL}};
ENUM_S e_osabi[] = { {ELFOSABI_NONE, "UNIX - System V"}, {ELFOSABI_ARM_AEABI, "ARM EABI"}, {ELFOSABI_ARM, "ARM"}, {INVALID, NULL}};
/* elf header */
ENUM_S e_type[] =
{
{ET_REL, "Relocatable file"},
{ET_EXEC, "Executable file"},
{ET_DYN, "Shared object file"},
{ET_CORE, "Core file"},
{INVALID, NULL}
};
ENUM_S e_machine[] = {{EM_386, "Intel 80386"}, {EM_ARM, "ARM"}, {INVALID, NULL}};
unsigned char * e_ident = header->e_ident;
if ((e_ident[EI_MAG0] != ELFMAG0) || (e_ident[EI_MAG1] != ELFMAG1) ||
(e_ident[EI_MAG2] != ELFMAG2) || (e_ident[EI_MAG2] != ELFMAG2))
{
printf("Not elf format!\n");
return -1;
}
printf("ELF Header:\n\tMagic:");
for (i = 0; i < EI_NIDENT; i++)
{
printf(" %02x", e_ident[i]);
}
printf("\n");
printf("\tClass: %s\n", enum2str(e_class, e_ident[EI_CLASS]));
printf("\tData: 2's complement, %s\n", enum2str(e_data, e_ident[EI_DATA]));
printf("\tVersion: %d (current)\n", e_ident[EI_VERSION]);
printf("\tOS/ABI: %s\n", enum2str(e_osabi, e_ident[EI_OSABI]));
printf("\tABI Version: %d\n", e_ident[EI_ABIVERSION]);
printf("\tType: %s\n", enum2str(e_type, header->e_type));
printf("\tMachine: %s\n", enum2str(e_machine, header->e_machine));
printf("\tVersion: 0x%x\n", header->e_version);
printf("\tEntry point address: 0x%x\n", header->e_entry);
printf("\tStart of program headers: %d (bytes into file)\n", header->e_phoff);
printf("\tStart of section headers: %d (bytes into file)\n", header->e_shoff);
printf("\tFlags: 0x%x\n", header->e_flags);
printf("\tSize of this header: %d (bytes)\n", header->e_ehsize);
printf("\tSize of program headers: %d (bytes)\n", header->e_phentsize);
printf("\tNumber of program headers: %d\n", header->e_phnum);
printf("\tSize of section headers: %d (bytes)\n", header->e_shentsize);
printf("\tNumber of section headers: %d\n", header->e_shnum);
printf("\tSection header string table index: %d\n", header->e_shstrndx);
return 0;
}
char * getSectionTypeName(Elf32_Word sh_type)
{
ENUM_S sh_type_list[] =
{
{SHT_NULL, "NULL"},
{SHT_PROGBITS, "PROGBITS"},
{SHT_SYMTAB, "SYMTAB"},
{SHT_STRTAB, "STRTAB"},
{SHT_RELA, "RELA"},
{SHT_HASH, "HASH"},
{SHT_DYNAMIC, "DYNAMIC"},
{SHT_NOTE, "NOTE"},
{SHT_NOBITS, "NOBITS"},
{SHT_REL, "REL"},
{SHT_SHLIB, "SHLIB"},
{SHT_DYNSYM, "DYNSYM"},
{SHT_INIT_ARRAY, "INIT_ARRAY"},
{SHT_FINI_ARRAY, "FINI_ARRAY"},
{SHT_PREINIT_ARRAY, "PREINIT_ARRAY"},
{SHT_GROUP, "GROUP"},
{SHT_SYMTAB_SHNDX, "SYMTAB_SHNDX"},
{SHT_NUM, "NUM"},
{SHT_LOOS , "LOOS"},
{SHT_GNU_ATTRIBUTES , "GNU_ATTRIBUTES"},
{SHT_GNU_HASH , "GNU_HASH"},
{SHT_GNU_LIBLIST , "GNU_LIBLIST"},
{SHT_CHECKSUM , "CHECKSUM"},
{SHT_LOSUNW , "LOSUNW"},
{SHT_SUNW_move , "SUNW_move"},
{SHT_SUNW_COMDAT , "SUNW_COMDAT"},
{SHT_SUNW_syminfo , "SUNW_syminfo"},
{SHT_GNU_verdef , "GNU_verdef"},
{SHT_GNU_verneed , "GNU_verneed"},
{SHT_GNU_versym , "GNU_versym"},
{SHT_HISUNW , "HISUNW"},
{SHT_HIOS , "HIOS"},
{SHT_LOPROC , "LOPROC"},
{SHT_HIPROC , "HIPROC"},
{SHT_LOUSER , "LOUSER"},
{SHT_HIUSER , "HIUSER"},
{INVALID, NULL}
};
return enum2str(sh_type_list, (int)sh_type);
}
char * getStrTabStr(Elf32_Ehdr * ehdr, Elf32_Shdr * strtabhdr, int idx)
{
if (idx < strtabhdr->sh_size)
{
return ((char *)ehdr + strtabhdr->sh_offset + idx);
}
return NULL;
}
Elf32_Shdr * getSHStrTab(Elf32_Ehdr * ehdr)
{
int i = 0, shnum = ehdr->e_shnum;
char * nameStr = NULL;
Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
for (i = 0; i < shnum; i++, shdr++)
{
if (shdr->sh_type == SHT_STRTAB)
{
if (shdr->sh_name >= shdr->sh_size)
{
continue;
}
nameStr = (char *)ehdr + shdr->sh_offset + shdr->sh_name;
if (strcmp(nameStr, SECTION_NAME_SHSTRTAB) == 0)
{
return shdr;
}
}
}
return NULL;
}
Elf32_Shdr * getSHdrByName(Elf32_Ehdr * ehdr, const char * name, Elf32_Word sh_type)
{
int i = 0, shnum = ehdr->e_shnum;
char * nameStr = NULL;
Elf32_Shdr * shstrtabhdr = NULL;
Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
shstrtabhdr = getSHStrTab(ehdr);
if (shstrtabhdr == NULL)
{
printf("getSHdrByName Get shstrtabhdr failed name:%s sh_type:%d\n", name, sh_type);
return NULL;
}
shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
for (i = 0; i < shnum; i++, shdr++)
{
if (shdr->sh_type == sh_type)
{
nameStr = getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name);
if (nameStr && strcmp(nameStr, name) == 0)
{
return shdr;
}
}
}
printf("getSHdrByName Get section header failed name:%s sh_type:%d\n", name, sh_type);
return NULL;
}
int parseSectionHeader(Elf32_Ehdr * ehdr)
{
int i = 0, shnum = ehdr->e_shnum;
Elf32_Shdr * shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
Elf32_Shdr * shstrtabhdr = NULL;
shstrtabhdr = getSHStrTab(ehdr);
if (shstrtabhdr == NULL)
{
printf("parseSectionHeader Get shstrtabhdr failed\n");
return -1;
}
printf("\nThere are %d section headers, starting at offset 0x%d:\n", shnum, ehdr->e_shoff);
printf("Section Headers:\n");
printf(" [Nr] Name Type Addr Off Size ES Flg Lk Inf Al\n");
for (i = 0; i < shnum; i++, shdr++)
{
printf(" [%2d] ", i);
printf("%-24s ", getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));
printf("%-15s ", getSectionTypeName(shdr->sh_type));
printf("%08x ", shdr->sh_addr);
printf("%06x ", shdr->sh_offset);
printf("%06x ", shdr->sh_size);
printf("%02x ", shdr->sh_entsize);
printf("%03x ", shdr->sh_flags);
printf("%02x ", shdr->sh_link);
printf("%03x ", shdr->sh_info);
printf("%02x ", shdr->sh_addralign);
printf("\n");
}
return 0;
}
/* 解析动态和静态符号表时,须告知符号节名、类型及符号表用到字串节名称 */
int parseSymTab(Elf32_Ehdr * ehdr, char * symtab, char * strtab, Elf32_Word sh_type)
{
int i = 0, sym_cnt = 0;
Elf32_Shdr * shdrStrTab = NULL;
Elf32_Shdr * shdrSymTab = NULL;
Elf32_Sym * sym = NULL;
/* Elf32_Sym st_info_binding */
ENUM_S st_info_bind[] =
{
{STB_LOCAL, "LOCAL"},
{STB_GLOBAL, "GLOBAL"},
{STB_WEAK, "WEAK"},
{STB_NUM, "NUM"},
{STB_HIOS, "HIOS"},
{STB_LOPROC, "LOPROC"},
{STB_HIPROC, "HIPROC"},
{INVALID, NULL}
};
/* Elf32_Sym st_info_type */
ENUM_S st_info_type[] =
{
{STT_NOTYPE, "NOTYPE"},
{STT_OBJECT, "OBJECT"},
{STT_FUNC, "FUNC"},
{STT_SECTION, "SECTION"},
{STT_FILE, "FILE"},
{STT_COMMON, "COMMON"},
{STT_TLS, "TLS"},
{STT_NUM, "NUM"},
{INVALID, NULL}
};
/*Elf32_Sym visibility */
ENUM_S st_other_visibility[] =
{
{STV_DEFAULT, "DEFAULT"},
{STV_INTERNAL, "INTERNAL"},
{STV_HIDDEN, "HIDDEN"},
{STV_PROTECTED, "PROTECTED"},
{INVALID, NULL}
};
shdrSymTab = getSHdrByName(ehdr, symtab, sh_type);
if (shdrSymTab == NULL)
{
printf("parseSymTab getSymTab faild\n");
return -1;
}
shdrStrTab = getSHdrByName(ehdr, strtab, SHT_STRTAB);
if (shdrStrTab == NULL)
{
printf("parseSymTab getStrTab faild\n");
return -1;
}
sym_cnt = shdrSymTab->sh_size / sizeof(Elf32_Sym);
printf("\nSymbol table '%s' contains %d entries:\n", symtab, sym_cnt);
printf(" Num: Value Size Type Bind Vis Ndx Name\n");
sym = (Elf32_Sym *)((char *)ehdr + shdrSymTab->sh_offset);
for (i = 0; i < sym_cnt; i++, sym++)
{
printf("%6d: ", i);
printf("%08x ", sym->st_value);
printf("%5d ", sym->st_size);
printf("%-7s ", enum2str(st_info_type, ELF32_ST_TYPE(sym->st_info)));
printf("%-6s ", enum2str(st_info_bind, ELF32_ST_BIND(sym->st_info)));
printf("%-8s ", enum2str(st_other_visibility, ELF32_ST_VISIBILITY(sym->st_other)));
printf("%5d ", sym->st_shndx);
printf("%-32s", getStrTabStr(ehdr, shdrStrTab, sym->st_name));
printf("\n");
}
return 0;
}
int parseProgramHeader(Elf32_Ehdr * ehdr)
{
int i = 0, phnum = ehdr->e_phnum;
Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);
ENUM_S p_type[] =
{
{PT_NULL, "NULL"},
{PT_LOAD, "LOAD"},
{PT_DYNAMIC, "DYNAMIC"},
{PT_INTERP, "INTERP"},
{PT_NOTE, "NOTE"},
{PT_SHLIB, "SHLIB"},
{PT_PHDR, "PHDR"},
{PT_TLS, "TLS"},
{PT_GNU_EH_FRAME, "GNU_EH_FRAME"},
{PT_GNU_STACK, "GNU_STACK"},
{PT_GNU_RELRO, "GNU_RELRO"},
{INVALID, NULL},
};
ENUM_S p_flags[] =
{
{PF_X, "X"},
{PF_W, "W"},
{PF_R, "R"},
{PF_X | PF_W, "WE"},
{PF_X | PF_R, "RE"},
{PF_W | PF_R, "RW"},
{PF_X | PF_W | PF_R, "RWE"},
{INVALID, NULL},
};
printf("\nProgram Headers:\n");
printf(" Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align\n");
for (i = 0; i < phnum; i++, phdr++)
{
printf(" %-12s ", enum2str(p_type, phdr->p_type));
printf("0x%08x ", phdr->p_offset);
printf("0x%08x ", phdr->p_vaddr);
printf("0x%08x ", phdr->p_paddr);
printf("0x%08x ", phdr->p_filesz);
printf("0x%08x ", phdr->p_memsz);
printf("%-3s ", enum2str(p_flags, phdr->p_flags));
printf("%#04x ", phdr->p_align);
printf("\n");
/* 解释器类型,则打印出解析器名称 */
if (phdr->p_type == PT_INTERP)
{
printf(" [Requesting program interpreter:%s]\n", (char *)ehdr + phdr->p_offset);
}
}
return 0;
}
/* 解析段包含的section信息 */
int parseSegment(Elf32_Ehdr * ehdr)
{
int i = 0, j = 0;
Elf32_Shdr * shdr = NULL;
Elf32_Phdr * phdr = (Elf32_Phdr *)((char *)ehdr + ehdr->e_phoff);
Elf32_Shdr * shstrtabhdr = NULL;
shstrtabhdr = getSHStrTab(ehdr);
if (shstrtabhdr == NULL)
{
printf("parseSectionHeader Get shstrtabhdr failed\n");
return -1;
}
printf("\nSection to Segment mapping:\n");
printf(" Segment Sections...\n");
for (i = 0; i < ehdr->e_phnum; i++, phdr++)
{
printf(" %2.2d ", i);
shdr = (Elf32_Shdr *)((char *)ehdr + ehdr->e_shoff);
for (j = 1; j < ehdr->e_shnum; j++, shdr++)
{
if (shdr->sh_size <= 0)
{
continue;
}
/* 地址或者偏移在segment地址范围内的section都归到该segment */
if (shdr->sh_flags & SHF_ALLOC)
{
if ((shdr->sh_addr < phdr->p_vaddr) || (shdr->sh_addr + shdr->sh_size > phdr->p_vaddr + phdr->p_memsz))
{
continue;
}
}
else
{
if ((shdr->sh_offset < phdr->p_offset) || (shdr->sh_offset + shdr->sh_size > phdr->p_offset + phdr->p_filesz))
{
continue;
}
}
printf("%s ",getStrTabStr(ehdr, shstrtabhdr, shdr->sh_name));
}
printf("\n");
}
return 0;
}
int parseFile(char * mem, int size)
{
int ret = 0;
Elf32_Ehdr * ehdr = (Elf32_Ehdr *)mem;
/* 解析elf总头 */
parseHeader(ehdr);
/* 解析section总头 */
parseSectionHeader(ehdr);
/* 解析.symtab与.dynsym */
parseSymTab(ehdr,SECTION_NAME_SYMTAB,SECTION_NAME_STRTAB,SHT_SYMTAB);
parseSymTab(ehdr,SECTION_NAME_DYNSYM,SECTION_NAME_DYNSTR,SHT_DYNSYM);
/* 解析程序头与segment信息 */
parseProgramHeader(ehdr);
parseSegment(ehdr);
return 0;
}
int main(int argc, char * argv[])
{
int size = 0;
char * mem = NULL;
if( argc < 2)
{
printf("./readelf elf\n");
return -1;
}
mem = readFile(argv[1], &size);
parseFile(mem, size);
if (mem)
{
free(mem);
}
return 0;
}