jvm开发笔记1---class文件解析器

作者:王智通(阿里云安全工程师)

  笔者最近对java虚拟机产生了浓厚的兴趣, 想了解下最简单的jvm是如何写出来的,于是看起了《java虚拟机规范》,这个规范如同intel开发手册一样,是每个jvm开发人员必须掌握的。 要想翻译执行java byte code, 首先得从java class文件中把Code属性解析出来才行。 在笔者看来, java的class文件结构着实比elf文件结构复杂很多,不过在复杂的结构, 只要耐心对照着手册中的结构一一解析即可, 经过几天的努力, 用c实现了一个class文件解析器,目前它只能解析手册中规定的jvm最基本的要解析出来的一些属性:Code, StackMapTable, LineNumberTable。当然, 随着开发的深入, 它会不断的健壮起来。

下面说说我在解析java class文件格式中碰到的几个问题, 帮助后面也要自己动手写解析器的朋友少走一点弯路:

1、为了提高解析性能, 使用了mmap讲class文件全部映射到内存中, 而不是每次解析都要使用read读磁盘文件。

int mmap_class_file(const char *class_file)
{
        struct stat f_stat;
 
        class_fd = open(class_file, O_RDONLY);
        if (class_fd == -1) {
                perror("open");
                return -1;
        }
 
        if (stat(class_file, &f_stat) == -1) {
                perror("stat");
                close(class_fd);
                return -1;
        }
 
        class_file_len = f_stat.st_size;
        printf("%s file len: %d\n", class_file, class_file_len);
 
        class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
        if (!class_start_mem) {
                perror("mmap");
                close(class_fd);
                return -1;
        }
        printf("mmap %s at %p\n", class_file, class_start_mem);
 
        return 0;
}

 

2、java class使用的是big-endian字节序,x86使用的litte-endian字节序, 所以要转换一下,就是移位操作而已。

#define CLASS_READ_U4(s, p)                             \
        do {                                            \
                s = (((p >> 24) & 0x000000ff) |         \
                        ((p >> 8) & 0x0000ff00) |       \
                        ((p << 24) & 0xff000000) |      \
                        ((p << 8) & 0x00ff0000));       \
        } while (0);
 
#define CLASS_READ_U2(s, p)                             \
        do {                                            \
                s = (((p >> 8) & 0x00ff) |              \
                        ((p << 8) & 0xff00));           \
        } while (0);
 
#define CLASS_READ_U1(s, p)                             \
        do {                                            \
                s = p;                                  \
        } while (0);

 

例如读一个4字节内容:

u4 class_magic;
 
/* read class magic number. */
CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
p_mem = 4;
 
printf("magic: 0x%x\n", class_magic);

 



////////////////////////////////////////////////////////////////////////////////////////////

下面是全部的源码:

jvm.h
 
#ifndef JVM_H
#define JVM_H
 
#define JVM_CLASS_MAGIC                                 0xcafebabe
 
#define CLASS_READ_U4(s, p)                             \
        do {                                            \
                s = (((p >> 24) & 0x000000ff) |         \
                        ((p >> 8) & 0x0000ff00) |       \
                        ((p << 24) & 0xff000000) |      \
                        ((p << 8) & 0x00ff0000));       \
        } while (0);
 
#define CLASS_READ_U2(s, p)                             \
        do {                                            \
                s = (((p >> 8) & 0x00ff) |              \
                        ((p << 8) & 0xff00));           \
        } while (0);
 
#define CLASS_READ_U1(s, p)                             \
        do {                                            \
                s = p;                                  \
        } while (0);
 
#define CLASS_READ_STRING(s, p, len)                    \
        do {                                            \
                memcpy(s, p, len);                      \
        } while (0);
 
typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;
 
#define CONSTANT_Class                                  7
#define CONSTANT_Fieldref                               9
#define CONSTANT_Methodref                              10
#define CONSTANT_InterfaceMethodref                     11
#define CONSTANT_String                                 8
#define CONSTANT_Integer                                3
#define CONSTANT_Float                                  4
#define CONSTANT_Long                                   5
#define CONSTANT_Double                                 6
#define CONSTANT_NameAndType                            12
#define CONSTANT_Utf8                                   1
#define CONSTANT_MethodHandle                           15
#define CONSTANT_MethodType                             16
#define CONSTANT_InvokeDynamic                          18
 
#define ACC_PUBLIC                                      0x0001
#define ACC_FINAL                                       0x0010
#define ACC_SUPER                                       0x0020
#define ACC_INTERFACE                                   0x0200
#define ACC_ABSTRACT                                    0X0400
#define ACC_SYNTHETIC                                   0x1000
#define ACC_ANNOTATION                                  0x2000
#define ACC_ENUM                                        0x4000
 
#define METHOD_ACC_PUBLIC                               0x0001
#define METHOD_ACC_PRIVATE                              0x0002
#define METHOD_ACC_PROTECTED                            0x0004
#define METHOD_ACC_STATIC                               0x0008
#define METHOD_ACC_FINAL                                0x0010
#define METHOD_ACC_SYNCHRONIED                          0x0020
#define METHOD_ACC_BRIDGE                               0x0040
#define METHOD_ACC_VARARGS                              0x0080
#define METHOD_ACC_NATIVE                               0x0100
#define METHOD_ACC_ABSTRACT                             0x0400
#define METHOD_ACC_STRICT                               0x0800
#define METHOD_ACC_SYNTHETIC                            0x1000
 
#define ITEM_Top                                        0
#define ITEM_Integer                                    1
#define ITEM_Float                                      2
#define ITEM_Double                                     3
#define ITEM_Long                                       4
#define ITEM_Null                                       5
#define ITEM_UninitializedThis                          6
#define ITEM_Object                                     7
#define ITEM_Uninitialized                              8
 
struct constant_info_st {
        u2 index;
        u1 *base;
}__attribute__ ((packed));
 
struct cp_info {
        u1 tag;
        u1 info[];
}__attribute__ ((packed));
 
struct CONSTANT_Class_info {
        //u1 tag;
        u2 name_index;
}__attribute__ ((packed));
 
struct CONSTANT_Fieldref_info {
        //u1 tag;
        u2 class_index;
        u2 name_and_type_index;
}__attribute__ ((packed));
 
struct CONSTANT_Methodref_info {
        //u1 tag;
        u2 class_index;
        u2 name_and_type_index;
}__attribute__ ((packed));
 
struct CONSTANT_InterfaceMethodref_info {
        //u1 tag;
        u2 class_index;
        u2 name_and_type_inex;
}__attribute__ ((packed));
 
struct CONSTANT_String_info {
        //u1 tag;
        u2 string_index;
}__attribute__ ((packed));
 
struct CONSTANT_Integer_info {
        //u1 tag;
        u4 bytes;
}__attribute__ ((packed));
 
struct CONSTANT_Float_info {
        //u1 tag;
        u4 bytes;
}__attribute__ ((packed));
 
struct CONSTANT_Long_info {
        //u1 tag;
        u4 high_bytes;
        u4 low_bytes;
}__attribute__ ((packed));
 
struct CONSTANT_Double_info {
        //u1 tag;
        u4 high_bytes;
        u4 low_bytes;
}__attribute__ ((packed));
 
struct CONSTANT_NameAndType_info {
        //u1 tag;
        u2 name_index;
        u2 descriptor_index;
}__attribute__ ((packed));
 
struct CONSTANT_Utf8_info {
        //u1 tag;
        u2 length;
        u1 bytes[];
}__attribute__ ((packed));
 
struct CONSTANT_MethodHandle_info {
        //u1 tag;
        u1 reference_kind;
        u2 reference_index;
}__attribute__ ((packed));
 
struct CONSTANT_MethodType_info {
        //u1 tag;
        u2 descriptor_index;
}__attribute__ ((packed));
 
struct CONSTANT_InvokeDynamic_info {
        //u1 tag;
        u2 bootstrap_method_attr_index;
        u2 name_and_type_index;
}__attribute__ ((packed));
 
#endif
 
////////////////////////////////////////////////////////////////////////
 
classreader.c:
 
/*
 * classreader.c - jvm class file parser.
 *
 * (c) wzt 2012         http://www.cloud-sec.org
 *
 */
 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
 
#include "jvm.h"
 
static int class_fd;
static int class_file_len;
static void *class_start_mem;
static char *p_mem;
static struct constant_info_st *constant_info;
 
int mmap_class_file(const char *class_file)
{
        struct stat f_stat;
 
        class_fd = open(class_file, O_RDONLY);
        if (class_fd == -1) {
                perror("open");
                return -1;
        }
 
        if (stat(class_file, &f_stat) == -1) {
                perror("stat");
                close(class_fd);
                return -1;
        }
 
        class_file_len = f_stat.st_size;
        printf("%s file len: %d\n", class_file, class_file_len);
 
        class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
        if (!class_start_mem) {
                perror("mmap");
                close(class_fd);
                return -1;
        }
        printf("mmap %s at %p\n", class_file, class_start_mem);
 
        return 0;
}
 
int mmap_exit(void)
{
        if (munmap(class_start_mem, class_file_len) == -1) {
                perror("munmap");
                return -1;
        }
 
        close(class_fd);
        return 0;
}
 
int parse_class_magic(void)
{
        u4 class_magic;
 
        /* read class magic number. */
        CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
        p_mem = 4;
 
        printf("magic: 0x%x\n", class_magic);
        if (class_magic != JVM_CLASS_MAGIC) {
                printf("jvm class magic not match.\n");
                return -1;
        }
        printf("jvm class magic match: 0x%x\n", class_magic);
        return 0;
}
 
int parse_class_version(void)
{
        u2 minor_version, major_version;
        u2 constant_pool_count;
 
        /* read class minor_version. */
        CLASS_READ_U2(minor_version, (*(u2 *)p_mem))
        p_mem = 2;
        printf("jvm class minor_version: %d\n", minor_version);
 
        /* read class major_version. */
        CLASS_READ_U2(major_version, (*(u2 *)p_mem))
        p_mem = 2;
        printf("jvm class major_version: %d\n", major_version);
 
        return 0;
}
 
int parse_class_constant(void)
{
        u2 constant_pool_count;
        u1 constant_tag;
        u2 idx;
 
        printf("\n-----------parse contant pool count----------------------:\n\n");
        /* read constant_pool_count */
        CLASS_READ_U2(constant_pool_count, (*(u2 *)p_mem))
        p_mem = 2;
        printf("jvm constant_pool_count: %d\n", constant_pool_count);
 
        constant_info = (struct constant_info_st *)
                        malloc(sizeof(struct constant_info_st) *
                                constant_pool_count);
        if (!constant_info) {
                printf("Malloc failed.\n");
                return -1;
        }
 
        for (idx = 1; idx <= constant_pool_count - 1; idx ) {
                CLASS_READ_U1(constant_tag, (*(u1 *)p_mem))
                p_mem = 1;
                printf("- idx: - constant tag: %d\t", idx, (int)constant_tag);
                switch (constant_tag) {
                case CONSTANT_Fieldref:
                case CONSTANT_Methodref:
                case CONSTANT_InterfaceMethodref:
                {
                        struct CONSTANT_Methodref_info methodref_info;
 
                        CLASS_READ_U2(methodref_info.class_index, (*(u2 *)p_mem));
                        p_mem = 2;
                        assert(methodref_info.class_index > 0 &&
                                methodref_info.class_index < constant_pool_count);
 
                        CLASS_READ_U2(methodref_info.name_and_type_index, (*(u2 *)p_mem));
                        p_mem = 2;
                        assert(methodref_info.class_index > 0 &&
                                methodref_info.class_index < constant_pool_count);
 
                        printf("class_index: %d, name_and_type_index: %d\n",
                                methodref_info.class_index,
                                methodref_info.name_and_type_index);
                        break;
                }
                case CONSTANT_Class:
                {
                        struct CONSTANT_Class_info class_info;
 
                        CLASS_READ_U2(class_info.name_index, (*(u2 *)p_mem));
                        p_mem = 2;
                        assert(class_info.name_index > 0 &&
                                class_info.name_index < constant_pool_count);
                        printf("name_index: %d\n", class_info.name_index);
                        break;
                }
                case CONSTANT_String:
                {
                        struct CONSTANT_String_info string_info;
 
                        CLASS_READ_U2(string_info.string_index, (*(u2 *)p_mem));
                        p_mem = 2;
                        assert(string_info.string_index > 0 &&
                                string_info.string_index < constant_pool_count);
                        printf("string index: %d\n", string_info.string_index);
                        break;
                }
                case CONSTANT_Long:
                {
                        struct CONSTANT_Long_info long_info;
 
                        CLASS_READ_U2(long_info.high_bytes, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        CLASS_READ_U2(long_info.low_bytes, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        printf("high bytes: %d, low bytes: %d\n",
                                long_info.high_bytes, long_info.low_bytes);
                        break;
                }
                case CONSTANT_Integer:
                {
                        struct CONSTANT_Integer_info integer_info;
 
                        CLASS_READ_U4(integer_info.bytes, (*(u4 *)p_mem));
                        p_mem = 4;
                        printf("bytes: %d\n", integer_info.bytes);
                        break;
                }
                case CONSTANT_Float:
                {
                        struct CONSTANT_Float_info float_info;
 
                        CLASS_READ_U4(float_info.bytes, (*(u4 *)p_mem));
                        p_mem = 4;
                        printf("bytes: %d\n", float_info.bytes);
                        break;
                }
                case CONSTANT_Double:
                {
                        struct CONSTANT_Double_info double_info;
 
                        CLASS_READ_U4(double_info.high_bytes, (*(u4 *)p_mem));
                        p_mem = 4;
 
                        CLASS_READ_U4(double_info.low_bytes, (*(u4 *)p_mem));
                        p_mem = 4;
                        printf("high_bytes: %d, low_bytes: %d\n",
                                double_info.high_bytes, double_info.low_bytes);
                        break;
                }
                case CONSTANT_NameAndType:
                {
                        struct CONSTANT_NameAndType_info name_type_info;
 
                        CLASS_READ_U2(name_type_info.name_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        CLASS_READ_U2(name_type_info.descriptor_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        printf("name_index: %d, descriptor_index: %d\n",
                                name_type_info.name_index, name_type_info.descriptor_index);
                        break;
                }
                case CONSTANT_MethodHandle:
                {
                        struct CONSTANT_MethodHandle_info method_handle_info;
 
                        CLASS_READ_U1(method_handle_info.reference_kind, (*(u1 *)p_mem));
                        p_mem = 1;
 
                        CLASS_READ_U2(method_handle_info.reference_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        printf("reference_kind: %d, reference_index: %d\n",
                                method_handle_info.reference_kind,
                                method_handle_info.reference_index);
                        break;
                }
                case CONSTANT_MethodType:
                {
                        struct CONSTANT_MethodType_info method_type_info;
 
                        CLASS_READ_U2(method_type_info.descriptor_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        printf("descriptor_index %d\n", method_type_info.descriptor_index);
                        break;
                }
                case CONSTANT_InvokeDynamic:
                {
                        struct CONSTANT_InvokeDynamic_info invoke_dyc_info;
 
                        CLASS_READ_U2(invoke_dyc_info.bootstrap_method_attr_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        CLASS_READ_U2(invoke_dyc_info.name_and_type_index, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        printf("bootstrap_method_attr_index: %d, name_and_type_index: %d\n",
                                invoke_dyc_info.bootstrap_method_attr_index,
                                invoke_dyc_info.name_and_type_index);
                        break;
                }
                case CONSTANT_Utf8:
                {
                        u2 len;
                        char *buf;
 
                        CLASS_READ_U2(len, (*(u2 *)p_mem));
                        p_mem = 2;
 
                        buf = malloc(len 1);
                        buf[len] = '\0';
                        assert(buf != NULL);
 
                        memcpy(buf, p_mem, len);
                        printf("len: %d\t%s\n", len, buf);
                        p_mem = len;
 
                        constant_info[idx].index = idx;
                        constant_info[idx].base = buf;
                        break;
                }
                default:
                        ;
                }
        }
        printf("\n");
/*
        for (idx = 1; idx <= constant_pool_count - 1; idx )
                printf("%d: %s\n", constant_info[idx].index, constant_info[idx].base);
*/
        return 0;
 
out:
        mmap_exit();
        return -1;
}
 
int parse_class_access_flag(void)
{
        u2 access_flag;
 
        /* read class access flag. */
        CLASS_READ_U2(access_flag, (*(u2 *)p_mem))
        p_mem = 2;
 
        printf("access_flag: 0x%x\n", access_flag);
        return 0;
}
int parse_class_this_super(void)
{
        u2 this_class;
        u2 super_class;
 
        CLASS_READ_U2(this_class, (*(u2 *)p_mem))
        p_mem = 2;
 
        CLASS_READ_U2(super_class, (*(u2 *)p_mem))
        p_mem = 2;
 
        printf("this_class: %d\tsuper_class: %d\n\n", this_class, super_class);
        return 0;
}
 
int parse_class_interface(void)
{
        u2 interfaces_count;
        u2 idx, index;
 
        CLASS_READ_U2(interfaces_count, (*(u2 *)p_mem))
        p_mem = 2;
        printf("interfaces_count: %d\n", interfaces_count);
 
        for (idx = 0; idx < interfaces_count; idx ) {
                CLASS_READ_U2(index, (*(u2 *)p_mem));
                p_mem = 2;
                printf("index: %d\n", index);
        }
 
        return 0;
}
 
int parse_class_filed(void)
{
        u2 fileds_count;
        u2 idx;
 
        CLASS_READ_U2(fileds_count, (*(u2 *)p_mem))
        p_mem = 2;
        printf("filed_count: %d\n", fileds_count);
 
        return 0;
}
int __parse_exception_table(int len)
{
        u2 start_pc, end_pc;
        u2 handler_pc, catch_type;
        u2 idx;
 
        for (idx = 0; idx < len; idx ) {
                CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
                p_mem = 2;
                printf("start_pc: %d\n", start_pc);
 
                CLASS_READ_U2(end_pc, (*(u2 *)p_mem))
                p_mem = 2;
                printf("end_pc: %d\n", end_pc);
 
                CLASS_READ_U2(handler_pc, (*(u2 *)p_mem))
                p_mem = 2;
                printf("handler_pc: %d\n", handler_pc);
 
                CLASS_READ_U2(catch_type, (*(u2 *)p_mem))
                p_mem = 2;
                printf("catch_type: %d\n", catch_type);
        }
 
        return 0;
}
 
int __parse_line_number_table(void)
{
        u4 attribute_length;
        u2 line_number_table_length;
        u2 start_pc, line_number;
        u2 idx;
 
        CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
        p_mem = 4;
        printf("\t\tattribute_length: %d\n", attribute_length);
 
        CLASS_READ_U2(line_number_table_length, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\t\tline_number_table_length: %d\n", line_number_table_length);
 
        for (idx = 0; idx < line_number_table_length; idx ) {
                CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
                p_mem = 2;
                printf("\t\tstart_pc: %d\n", start_pc);
 
                CLASS_READ_U2(line_number, (*(u2 *)p_mem))
                p_mem = 2;
                printf("\t\tline_number: %d\n", line_number);
        }
 
        return 0;
}
 
int __parse_verification_type_info(u1 number)
{
        u1 idx, tag;
 
        for (idx = 0; idx < number; idx ) {
                CLASS_READ_U1(tag, (*(u1 *)p_mem))
                p_mem = 1;
                printf("\t\ttag: %d\n", tag);
                switch (tag) {
                case ITEM_Top:
                        printf("\t\tITEM_Top.\n");
                        break;
                case ITEM_Integer:
                        printf("\t\tITEM_Integer.\n");
                        break;
                case ITEM_Float:
                        printf("\t\tITEM_float.\n");
                        break;
                case ITEM_Double:
                        printf("\t\tITEM_Double.\n");
                        break;
                case ITEM_Long:
                        printf("\t\tITEM_Long.\n");
                        break;
                case ITEM_Null:
                        printf("\t\tITEM_Long.\n");
                        break;
                case ITEM_UninitializedThis:
                        printf("\t\tITEM_UninitializedThis.\n");
                        break;
                case ITEM_Object:
                {
                        u2 cpool_index;
 
                        printf("\t\tITEM_Object.\n");
                        CLASS_READ_U2(cpool_index, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\tcpool_index: %d\n", cpool_index);
                        break;
                }
                case ITEM_Uninitialized:
                {
                        u2 offset;
 
                        printf("\t\tITEM_Uninitialized.\n");
                        CLASS_READ_U2(offset, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\toffset: %d\n", offset);
                        break;
                }
                default:
                        return -1;
                }
        }
 
        return 0;
}
 
int __parse_stack_map_frame(u2 number)
{
        u1 frame_type;
        u1 offset_delta;
        u2 idx;
        u1 stack_num;
        u1 locals_num;
        u1 local_idx;
 
        for (idx = 0; idx < number; idx ) {
                CLASS_READ_U1(frame_type, (*(u1 *)p_mem))
                p_mem = 1;
                printf("\t\tframe_type: %d\n", frame_type);
 
                if (frame_type >= 0 && frame_type <= 63) {
                        offset_delta = frame_type;
                        printf("\t\tsame_frame\toffset_delta: %d\n", offset_delta);
                }
                if (frame_type >= 64 && frame_type <= 127) {
                        offset_delta = frame_type - 64;
                        stack_num = 1;
                        printf("\t\tsame_locals_l_stack_item_frame\toffset_delta: %d\n",
                                offset_delta);
                        __parse_verification_type_info(stack_num);
                }
                if (frame_type == 247) {
                        stack_num = 1;
                        CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\tsame_locals_l_stack_item_frame_extendedn\toffset_delta: %d\n",
                                offset_delta);
                        __parse_verification_type_info(stack_num);
                }
                if (frame_type >= 248 && frame_type <= 250) {
                        CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\tsame_locals_l_stack_item_frame_extended\toffset_delta: %d\n",
                                offset_delta);
                }
                if (frame_type == 251) {
                        CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\tsame_frame_extended\toffset_delta: %d\n", offset_delta);
                }
                if (frame_type >= 252 && frame_type <= 254) {
                        CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
                        p_mem = 2;
                        printf("\t\tappend_frame\toffset_delta: %d\n", offset_delta);
 
                        locals_num = frame_type - 251;
                        printf("\t\tlocals_num: %d\n", locals_num);
 
                        __parse_verification_type_info(locals_num);
                }
        }
}
int __parse_stack_map_table(void)
{
        u4 attribute_length;
        u2 number_of_entries;
        u2 idx;
 
        CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
        p_mem = 4;
        printf("\t\tattribute_length: %d\n", attribute_length);
 
        CLASS_READ_U2(number_of_entries, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\t\tnumber_of_entries: %d\n", number_of_entries);
 
        __parse_stack_map_frame(number_of_entries);
 
        return 0;
}
/* attribute_name_index has been parsed before. */
int parse_code_attribute(void)
{
        u2 attribute_name_index;
        u4 attribute_length;
        u2 max_stack;
        u2 max_locals;
        u4 code_length;
        u1 *code;
        u2 exception_table_length;
        u2 attributes_count;
        u2 idx;
 
        CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
        p_mem = 4;
        printf("\tattribute_length: %d\n", attribute_length);
 
        CLASS_READ_U2(max_stack, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\tmax_stack: %d\n", max_stack);
 
        CLASS_READ_U2(max_locals, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\tmax_locals: %d\n", max_locals);
 
        CLASS_READ_U4(code_length, (*(u4 *)p_mem))
        p_mem = 4;
        printf("\tcode_length: %d\n", code_length);
 
        code = (u1 *)malloc(code_length 1);
        if (!code) {
                printf("Malloc failed.\n");
                return -1;
        }
        memcpy(code, p_mem, code_length);
        code[code_length] = '\0';
        p_mem = code_length;
 
        CLASS_READ_U2(exception_table_length, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\texception_table_length: %d\n", exception_table_length);
 
        __parse_exception_table(exception_table_length);
 
        CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
        p_mem = 2;
        printf("\tattributes_count: %d\n", attributes_count);
 
        /* parse attributes */
        for (idx = 0; idx < attributes_count; idx ) {
                CLASS_READ_U2(attribute_name_index, (*(u2 *)p_mem))
                p_mem = 2;
                printf("\tidx: %d attribute_name_index: %d", idx 1, attribute_name_index);
 
                if (!strcmp(constant_info[attribute_name_index].base, "LineNumberTable")) {
                        printf("\n\tparse LineNumberTable:\n");
                        __parse_line_number_table();
                }
                if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
                        printf("\n\tparse StackMapTable:\n");
                        __parse_stack_map_table();
                }
                if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTable")) {
                        ;
                }
                if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTypeTable")) {
                        ;
                }
                if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
                        ;
                }
        }
 
        return 0;
}
 
int parse_class_method(void)
{
        u2 method_count;
        u2 access_flags, name_index;
        u2 descriptor_index, attributes_count;
        u2 idx;
 
        printf("\n---------------parse class method-------------------------:\n\n");
        CLASS_READ_U2(method_count, (*(u2 *)p_mem))
        p_mem = 2;
        printf("method_count: %d\n", method_count);
 
        for (idx = 0; idx < method_count; idx ) {
                CLASS_READ_U2(access_flags, (*(u2 *)p_mem))
                p_mem = 2;
                printf("access_flags: 0x%x\n", access_flags);
 
                CLASS_READ_U2(name_index, (*(u2 *)p_mem))
                p_mem = 2;
                printf("name_index: %d\n", name_index);
 
                CLASS_READ_U2(descriptor_index, (*(u2 *)p_mem))
                p_mem = 2;
                printf("descriptor_index: %d\n", descriptor_index);
 
                CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
                p_mem = 2;
                printf("attributes_count: %d\n\n", attributes_count);
 
                /* parse attributes */
                CLASS_READ_U2(name_index, (*(u2 *)p_mem))
                p_mem = 2;
                printf("attritbutes name_index: %d\n", name_index);
 
                if (!strcmp(constant_info[name_index].base, "Code")) {
                        printf("parse code attribute:\n");
                        parse_code_attribute();
                }
                if (!strcmp(constant_info[name_index].base, "Exceptions")) {
                        ;
                }
                if (!strcmp(constant_info[name_index].base, "Signature")) {
                        ;
                }
        }
 
        return 0;
}
 
int jvm_parse_class_file(const char *class_file)
{
        assert(class_file != NULL);
        if (mmap_class_file(class_file) == -1)
                return -1;
 
        p_mem = class_start_mem;
        if (parse_class_magic() == -1)
                goto out;
 
        if (parse_class_version() == -1)
                goto out;
 
        if (parse_class_constant() == -1)
                goto out;
 
        if (parse_class_access_flag() == -1)
                goto out;
 
        if (parse_class_this_super() == -1)
                goto out;
 
        if (parse_class_interface() == -1)
                goto out;
 
        if (parse_class_filed() == -1)
                goto out;
 
        if (parse_class_method() == -1)
                goto out;
 
        mmap_exit();
        return 0;
out:
        mmap_exit();
        return -1;
}
 
void jvm_usage(const char *proc)
{
        fprintf(stdout, "usage: %s \n", proc);
}
 
int main(int argc, char **argv)
{
        if (argc == 1) {
                jvm_usage(argv[0]);
                return 0;
        }
 
        jvm_parse_class_file(argv[1]);
 
        return 0;
}
 
 
////////////////////////////////////////////////////////////////////////////
[email protected] # gcc -o classreader classreader.c -w
[email protected] # ./classreader test.class
test.class file len: 462
mmap test.class at 0x2b0b78fa5000
magic: 0xcafebabe
jvm class magic match: 0xcafebabe
jvm class minor_version: 0
jvm class major_version: 50
 
-----------parse contant pool count----------------------:
 
jvm constant_pool_count: 30
- idx:  1 constant tag: 10      class_index: 6, name_and_type_index: 16
- idx:  2 constant tag: 9       class_index: 17, name_and_type_index: 18
- idx:  3 constant tag: 8       string index: 19
- idx:  4 constant tag: 10      class_index: 20, name_and_type_index: 21
- idx:  5 constant tag: 7       name_index: 22
- idx:  6 constant tag: 7       name_index: 23
- idx:  7 constant tag: 1       len: 6  
- idx:  8 constant tag: 1       len: 3  ()V
- idx:  9 constant tag: 1       len: 4  Code
- idx: 10 constant tag: 1       len: 15 LineNumberTable
- idx: 11 constant tag: 1       len: 4  main
- idx: 12 constant tag: 1       len: 22 ([Ljava/lang/String;)V
- idx: 13 constant tag: 1       len: 13 StackMapTable
- idx: 14 constant tag: 1       len: 10 SourceFile
- idx: 15 constant tag: 1       len: 9  test.java
- idx: 16 constant tag: 12      name_index: 7, descriptor_index: 8
- idx: 17 constant tag: 7       name_index: 24
- idx: 18 constant tag: 12      name_index: 25, descriptor_index: 26
- idx: 19 constant tag: 1       len: 4  hehe
- idx: 20 constant tag: 7       name_index: 27
- idx: 21 constant tag: 12      name_index: 28, descriptor_index: 29
- idx: 22 constant tag: 1       len: 4  test
- idx: 23 constant tag: 1       len: 16 java/lang/Object
- idx: 24 constant tag: 1       len: 16 java/lang/System
- idx: 25 constant tag: 1       len: 3  out
- idx: 26 constant tag: 1       len: 21 Ljava/io/PrintStream;
- idx: 27 constant tag: 1       len: 19 java/io/PrintStream
- idx: 28 constant tag: 1       len: 7  println
- idx: 29 constant tag: 1       len: 21 (Ljava/lang/String;)V
 
access_flag: 0x21
this_class: 5   super_class: 6
 
interfaces_count: 0
filed_count: 0
 
---------------parse class method-------------------------:
 
method_count: 2
access_flags: 0x1
name_index: 7
descriptor_index: 8
attributes_count: 1
 
attritbutes name_index: 9
parse code attribute:
        attribute_length: 29
        max_stack: 1
        max_locals: 1
        code_length: 5
        exception_table_length: 0
        attributes_count: 1
        idx: 1 attribute_name_index: 10
        parse LineNumberTable:
                attribute_length: 6
                line_number_table_length: 1
                start_pc: 0
                line_number: 5
access_flags: 0x9
name_index: 11
descriptor_index: 12
attributes_count: 1
 
attritbutes name_index: 9
parse code attribute:
        attribute_length: 77
        max_stack: 2
        max_locals: 2
        code_length: 24
        exception_table_length: 0
        attributes_count: 2
        idx: 1 attribute_name_index: 10
        parse LineNumberTable:
                attribute_length: 22
                line_number_table_length: 5
                start_pc: 0
                line_number: 7
                start_pc: 2
                line_number: 9
                start_pc: 9
                line_number: 10
                start_pc: 17
                line_number: 9
                start_pc: 23
                line_number: 11
        idx: 2 attribute_name_index: 13
        parse StackMapTable:
                attribute_length: 7
                number_of_entries: 2
                frame_type: 252
                append_frame    offset_delta: 4
                locals_num: 1
                tag: 1
                ITEM_Integer.
                frame_type: 18
                same_frame      offset_delta: 18
[email protected] #



你可能感兴趣的:(jvm开发笔记1---class文件解析器)