笔者最近对java虚拟机产生了浓厚的兴趣, 想了解下最简单的jvm是如何写出来的,于是看起了《java虚拟机规范》,这个规范如同intel开发手册一样,是每个jvm开发人员必须掌握的。 要想翻译执行java byte code, 首先得从java class文件中把Code属性解析出来才行。 在笔者看来, java的class文件结构着实比elf文件结构复杂很多,不过在复杂的结构, 只要耐心对照着手册中的结构一一解析即可, 经过几天的努力, 用c实现了一个class文件解析器,目前它只能解析手册中规定的jvm最基本的要解析出来的一些属性:Code, StackMapTable, LineNumberTable。当然, 随着开发的深入, 它会不断的健壮起来。
下面说说我在解析java class文件格式中碰到的几个问题, 帮助后面也要自己动手写解析器的朋友少走一点弯路:
1、为了提高解析性能, 使用了mmap讲class文件全部映射到内存中, 而不是每次解析都要使用read读磁盘文件。
int mmap_class_file(const char *class_file) { struct stat f_stat; class_fd = open(class_file, O_RDONLY); if (class_fd == -1) { perror("open"); return -1; } if (stat(class_file, &f_stat) == -1) { perror("stat"); close(class_fd); return -1; } class_file_len = f_stat.st_size; printf("%s file len: %d\n", class_file, class_file_len); class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0); if (!class_start_mem) { perror("mmap"); close(class_fd); return -1; } printf("mmap %s at %p\n", class_file, class_start_mem); return 0; }
2、java class使用的是big-endian字节序,x86使用的litte-endian字节序, 所以要转换一下,就是移位操作而已。
#define CLASS_READ_U4(s, p) \ do { \ s = (((p >> 24) & 0x000000ff) | \ ((p >> 8) & 0x0000ff00) | \ ((p << 24) & 0xff000000) | \ ((p << 8) & 0x00ff0000)); \ } while (0); #define CLASS_READ_U2(s, p) \ do { \ s = (((p >> 8) & 0x00ff) | \ ((p << 8) & 0xff00)); \ } while (0); #define CLASS_READ_U1(s, p) \ do { \ s = p; \ } while (0);
例如读一个4字节内容:
u4 class_magic; /* read class magic number. */ CLASS_READ_U4(class_magic, (*(u4 *)p_mem)) p_mem = 4; printf("magic: 0x%x\n", class_magic);
////////////////////////////////////////////////////////////////////////////////////////////
下面是全部的源码:
jvm.h #ifndef JVM_H #define JVM_H #define JVM_CLASS_MAGIC 0xcafebabe #define CLASS_READ_U4(s, p) \ do { \ s = (((p >> 24) & 0x000000ff) | \ ((p >> 8) & 0x0000ff00) | \ ((p << 24) & 0xff000000) | \ ((p << 8) & 0x00ff0000)); \ } while (0); #define CLASS_READ_U2(s, p) \ do { \ s = (((p >> 8) & 0x00ff) | \ ((p << 8) & 0xff00)); \ } while (0); #define CLASS_READ_U1(s, p) \ do { \ s = p; \ } while (0); #define CLASS_READ_STRING(s, p, len) \ do { \ memcpy(s, p, len); \ } while (0); typedef unsigned int u4; typedef unsigned short u2; typedef unsigned char u1; #define CONSTANT_Class 7 #define CONSTANT_Fieldref 9 #define CONSTANT_Methodref 10 #define CONSTANT_InterfaceMethodref 11 #define CONSTANT_String 8 #define CONSTANT_Integer 3 #define CONSTANT_Float 4 #define CONSTANT_Long 5 #define CONSTANT_Double 6 #define CONSTANT_NameAndType 12 #define CONSTANT_Utf8 1 #define CONSTANT_MethodHandle 15 #define CONSTANT_MethodType 16 #define CONSTANT_InvokeDynamic 18 #define ACC_PUBLIC 0x0001 #define ACC_FINAL 0x0010 #define ACC_SUPER 0x0020 #define ACC_INTERFACE 0x0200 #define ACC_ABSTRACT 0X0400 #define ACC_SYNTHETIC 0x1000 #define ACC_ANNOTATION 0x2000 #define ACC_ENUM 0x4000 #define METHOD_ACC_PUBLIC 0x0001 #define METHOD_ACC_PRIVATE 0x0002 #define METHOD_ACC_PROTECTED 0x0004 #define METHOD_ACC_STATIC 0x0008 #define METHOD_ACC_FINAL 0x0010 #define METHOD_ACC_SYNCHRONIED 0x0020 #define METHOD_ACC_BRIDGE 0x0040 #define METHOD_ACC_VARARGS 0x0080 #define METHOD_ACC_NATIVE 0x0100 #define METHOD_ACC_ABSTRACT 0x0400 #define METHOD_ACC_STRICT 0x0800 #define METHOD_ACC_SYNTHETIC 0x1000 #define ITEM_Top 0 #define ITEM_Integer 1 #define ITEM_Float 2 #define ITEM_Double 3 #define ITEM_Long 4 #define ITEM_Null 5 #define ITEM_UninitializedThis 6 #define ITEM_Object 7 #define ITEM_Uninitialized 8 struct constant_info_st { u2 index; u1 *base; }__attribute__ ((packed)); struct cp_info { u1 tag; u1 info[]; }__attribute__ ((packed)); struct CONSTANT_Class_info { //u1 tag; u2 name_index; }__attribute__ ((packed)); struct CONSTANT_Fieldref_info { //u1 tag; u2 class_index; u2 name_and_type_index; }__attribute__ ((packed)); struct CONSTANT_Methodref_info { //u1 tag; u2 class_index; u2 name_and_type_index; }__attribute__ ((packed)); struct CONSTANT_InterfaceMethodref_info { //u1 tag; u2 class_index; u2 name_and_type_inex; }__attribute__ ((packed)); struct CONSTANT_String_info { //u1 tag; u2 string_index; }__attribute__ ((packed)); struct CONSTANT_Integer_info { //u1 tag; u4 bytes; }__attribute__ ((packed)); struct CONSTANT_Float_info { //u1 tag; u4 bytes; }__attribute__ ((packed)); struct CONSTANT_Long_info { //u1 tag; u4 high_bytes; u4 low_bytes; }__attribute__ ((packed)); struct CONSTANT_Double_info { //u1 tag; u4 high_bytes; u4 low_bytes; }__attribute__ ((packed)); struct CONSTANT_NameAndType_info { //u1 tag; u2 name_index; u2 descriptor_index; }__attribute__ ((packed)); struct CONSTANT_Utf8_info { //u1 tag; u2 length; u1 bytes[]; }__attribute__ ((packed)); struct CONSTANT_MethodHandle_info { //u1 tag; u1 reference_kind; u2 reference_index; }__attribute__ ((packed)); struct CONSTANT_MethodType_info { //u1 tag; u2 descriptor_index; }__attribute__ ((packed)); struct CONSTANT_InvokeDynamic_info { //u1 tag; u2 bootstrap_method_attr_index; u2 name_and_type_index; }__attribute__ ((packed)); #endif //////////////////////////////////////////////////////////////////////// classreader.c: /* * classreader.c - jvm class file parser. * * (c) wzt 2012 http://www.cloud-sec.org * */ #include #include #include #include #include #include #include #include #include #include "jvm.h" static int class_fd; static int class_file_len; static void *class_start_mem; static char *p_mem; static struct constant_info_st *constant_info; int mmap_class_file(const char *class_file) { struct stat f_stat; class_fd = open(class_file, O_RDONLY); if (class_fd == -1) { perror("open"); return -1; } if (stat(class_file, &f_stat) == -1) { perror("stat"); close(class_fd); return -1; } class_file_len = f_stat.st_size; printf("%s file len: %d\n", class_file, class_file_len); class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0); if (!class_start_mem) { perror("mmap"); close(class_fd); return -1; } printf("mmap %s at %p\n", class_file, class_start_mem); return 0; } int mmap_exit(void) { if (munmap(class_start_mem, class_file_len) == -1) { perror("munmap"); return -1; } close(class_fd); return 0; } int parse_class_magic(void) { u4 class_magic; /* read class magic number. */ CLASS_READ_U4(class_magic, (*(u4 *)p_mem)) p_mem = 4; printf("magic: 0x%x\n", class_magic); if (class_magic != JVM_CLASS_MAGIC) { printf("jvm class magic not match.\n"); return -1; } printf("jvm class magic match: 0x%x\n", class_magic); return 0; } int parse_class_version(void) { u2 minor_version, major_version; u2 constant_pool_count; /* read class minor_version. */ CLASS_READ_U2(minor_version, (*(u2 *)p_mem)) p_mem = 2; printf("jvm class minor_version: %d\n", minor_version); /* read class major_version. */ CLASS_READ_U2(major_version, (*(u2 *)p_mem)) p_mem = 2; printf("jvm class major_version: %d\n", major_version); return 0; } int parse_class_constant(void) { u2 constant_pool_count; u1 constant_tag; u2 idx; printf("\n-----------parse contant pool count----------------------:\n\n"); /* read constant_pool_count */ CLASS_READ_U2(constant_pool_count, (*(u2 *)p_mem)) p_mem = 2; printf("jvm constant_pool_count: %d\n", constant_pool_count); constant_info = (struct constant_info_st *) malloc(sizeof(struct constant_info_st) * constant_pool_count); if (!constant_info) { printf("Malloc failed.\n"); return -1; } for (idx = 1; idx <= constant_pool_count - 1; idx ) { CLASS_READ_U1(constant_tag, (*(u1 *)p_mem)) p_mem = 1; printf("- idx: - constant tag: %d\t", idx, (int)constant_tag); switch (constant_tag) { case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: { struct CONSTANT_Methodref_info methodref_info; CLASS_READ_U2(methodref_info.class_index, (*(u2 *)p_mem)); p_mem = 2; assert(methodref_info.class_index > 0 && methodref_info.class_index < constant_pool_count); CLASS_READ_U2(methodref_info.name_and_type_index, (*(u2 *)p_mem)); p_mem = 2; assert(methodref_info.class_index > 0 && methodref_info.class_index < constant_pool_count); printf("class_index: %d, name_and_type_index: %d\n", methodref_info.class_index, methodref_info.name_and_type_index); break; } case CONSTANT_Class: { struct CONSTANT_Class_info class_info; CLASS_READ_U2(class_info.name_index, (*(u2 *)p_mem)); p_mem = 2; assert(class_info.name_index > 0 && class_info.name_index < constant_pool_count); printf("name_index: %d\n", class_info.name_index); break; } case CONSTANT_String: { struct CONSTANT_String_info string_info; CLASS_READ_U2(string_info.string_index, (*(u2 *)p_mem)); p_mem = 2; assert(string_info.string_index > 0 && string_info.string_index < constant_pool_count); printf("string index: %d\n", string_info.string_index); break; } case CONSTANT_Long: { struct CONSTANT_Long_info long_info; CLASS_READ_U2(long_info.high_bytes, (*(u2 *)p_mem)); p_mem = 2; CLASS_READ_U2(long_info.low_bytes, (*(u2 *)p_mem)); p_mem = 2; printf("high bytes: %d, low bytes: %d\n", long_info.high_bytes, long_info.low_bytes); break; } case CONSTANT_Integer: { struct CONSTANT_Integer_info integer_info; CLASS_READ_U4(integer_info.bytes, (*(u4 *)p_mem)); p_mem = 4; printf("bytes: %d\n", integer_info.bytes); break; } case CONSTANT_Float: { struct CONSTANT_Float_info float_info; CLASS_READ_U4(float_info.bytes, (*(u4 *)p_mem)); p_mem = 4; printf("bytes: %d\n", float_info.bytes); break; } case CONSTANT_Double: { struct CONSTANT_Double_info double_info; CLASS_READ_U4(double_info.high_bytes, (*(u4 *)p_mem)); p_mem = 4; CLASS_READ_U4(double_info.low_bytes, (*(u4 *)p_mem)); p_mem = 4; printf("high_bytes: %d, low_bytes: %d\n", double_info.high_bytes, double_info.low_bytes); break; } case CONSTANT_NameAndType: { struct CONSTANT_NameAndType_info name_type_info; CLASS_READ_U2(name_type_info.name_index, (*(u2 *)p_mem)); p_mem = 2; CLASS_READ_U2(name_type_info.descriptor_index, (*(u2 *)p_mem)); p_mem = 2; printf("name_index: %d, descriptor_index: %d\n", name_type_info.name_index, name_type_info.descriptor_index); break; } case CONSTANT_MethodHandle: { struct CONSTANT_MethodHandle_info method_handle_info; CLASS_READ_U1(method_handle_info.reference_kind, (*(u1 *)p_mem)); p_mem = 1; CLASS_READ_U2(method_handle_info.reference_index, (*(u2 *)p_mem)); p_mem = 2; printf("reference_kind: %d, reference_index: %d\n", method_handle_info.reference_kind, method_handle_info.reference_index); break; } case CONSTANT_MethodType: { struct CONSTANT_MethodType_info method_type_info; CLASS_READ_U2(method_type_info.descriptor_index, (*(u2 *)p_mem)); p_mem = 2; printf("descriptor_index %d\n", method_type_info.descriptor_index); break; } case CONSTANT_InvokeDynamic: { struct CONSTANT_InvokeDynamic_info invoke_dyc_info; CLASS_READ_U2(invoke_dyc_info.bootstrap_method_attr_index, (*(u2 *)p_mem)); p_mem = 2; CLASS_READ_U2(invoke_dyc_info.name_and_type_index, (*(u2 *)p_mem)); p_mem = 2; printf("bootstrap_method_attr_index: %d, name_and_type_index: %d\n", invoke_dyc_info.bootstrap_method_attr_index, invoke_dyc_info.name_and_type_index); break; } case CONSTANT_Utf8: { u2 len; char *buf; CLASS_READ_U2(len, (*(u2 *)p_mem)); p_mem = 2; buf = malloc(len 1); buf[len] = '\0'; assert(buf != NULL); memcpy(buf, p_mem, len); printf("len: %d\t%s\n", len, buf); p_mem = len; constant_info[idx].index = idx; constant_info[idx].base = buf; break; } default: ; } } printf("\n"); /* for (idx = 1; idx <= constant_pool_count - 1; idx ) printf("%d: %s\n", constant_info[idx].index, constant_info[idx].base); */ return 0; out: mmap_exit(); return -1; } int parse_class_access_flag(void) { u2 access_flag; /* read class access flag. */ CLASS_READ_U2(access_flag, (*(u2 *)p_mem)) p_mem = 2; printf("access_flag: 0x%x\n", access_flag); return 0; } int parse_class_this_super(void) { u2 this_class; u2 super_class; CLASS_READ_U2(this_class, (*(u2 *)p_mem)) p_mem = 2; CLASS_READ_U2(super_class, (*(u2 *)p_mem)) p_mem = 2; printf("this_class: %d\tsuper_class: %d\n\n", this_class, super_class); return 0; } int parse_class_interface(void) { u2 interfaces_count; u2 idx, index; CLASS_READ_U2(interfaces_count, (*(u2 *)p_mem)) p_mem = 2; printf("interfaces_count: %d\n", interfaces_count); for (idx = 0; idx < interfaces_count; idx ) { CLASS_READ_U2(index, (*(u2 *)p_mem)); p_mem = 2; printf("index: %d\n", index); } return 0; } int parse_class_filed(void) { u2 fileds_count; u2 idx; CLASS_READ_U2(fileds_count, (*(u2 *)p_mem)) p_mem = 2; printf("filed_count: %d\n", fileds_count); return 0; } int __parse_exception_table(int len) { u2 start_pc, end_pc; u2 handler_pc, catch_type; u2 idx; for (idx = 0; idx < len; idx ) { CLASS_READ_U2(start_pc, (*(u2 *)p_mem)) p_mem = 2; printf("start_pc: %d\n", start_pc); CLASS_READ_U2(end_pc, (*(u2 *)p_mem)) p_mem = 2; printf("end_pc: %d\n", end_pc); CLASS_READ_U2(handler_pc, (*(u2 *)p_mem)) p_mem = 2; printf("handler_pc: %d\n", handler_pc); CLASS_READ_U2(catch_type, (*(u2 *)p_mem)) p_mem = 2; printf("catch_type: %d\n", catch_type); } return 0; } int __parse_line_number_table(void) { u4 attribute_length; u2 line_number_table_length; u2 start_pc, line_number; u2 idx; CLASS_READ_U4(attribute_length, (*(u4 *)p_mem)) p_mem = 4; printf("\t\tattribute_length: %d\n", attribute_length); CLASS_READ_U2(line_number_table_length, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tline_number_table_length: %d\n", line_number_table_length); for (idx = 0; idx < line_number_table_length; idx ) { CLASS_READ_U2(start_pc, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tstart_pc: %d\n", start_pc); CLASS_READ_U2(line_number, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tline_number: %d\n", line_number); } return 0; } int __parse_verification_type_info(u1 number) { u1 idx, tag; for (idx = 0; idx < number; idx ) { CLASS_READ_U1(tag, (*(u1 *)p_mem)) p_mem = 1; printf("\t\ttag: %d\n", tag); switch (tag) { case ITEM_Top: printf("\t\tITEM_Top.\n"); break; case ITEM_Integer: printf("\t\tITEM_Integer.\n"); break; case ITEM_Float: printf("\t\tITEM_float.\n"); break; case ITEM_Double: printf("\t\tITEM_Double.\n"); break; case ITEM_Long: printf("\t\tITEM_Long.\n"); break; case ITEM_Null: printf("\t\tITEM_Long.\n"); break; case ITEM_UninitializedThis: printf("\t\tITEM_UninitializedThis.\n"); break; case ITEM_Object: { u2 cpool_index; printf("\t\tITEM_Object.\n"); CLASS_READ_U2(cpool_index, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tcpool_index: %d\n", cpool_index); break; } case ITEM_Uninitialized: { u2 offset; printf("\t\tITEM_Uninitialized.\n"); CLASS_READ_U2(offset, (*(u2 *)p_mem)) p_mem = 2; printf("\t\toffset: %d\n", offset); break; } default: return -1; } } return 0; } int __parse_stack_map_frame(u2 number) { u1 frame_type; u1 offset_delta; u2 idx; u1 stack_num; u1 locals_num; u1 local_idx; for (idx = 0; idx < number; idx ) { CLASS_READ_U1(frame_type, (*(u1 *)p_mem)) p_mem = 1; printf("\t\tframe_type: %d\n", frame_type); if (frame_type >= 0 && frame_type <= 63) { offset_delta = frame_type; printf("\t\tsame_frame\toffset_delta: %d\n", offset_delta); } if (frame_type >= 64 && frame_type <= 127) { offset_delta = frame_type - 64; stack_num = 1; printf("\t\tsame_locals_l_stack_item_frame\toffset_delta: %d\n", offset_delta); __parse_verification_type_info(stack_num); } if (frame_type == 247) { stack_num = 1; CLASS_READ_U2(offset_delta, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tsame_locals_l_stack_item_frame_extendedn\toffset_delta: %d\n", offset_delta); __parse_verification_type_info(stack_num); } if (frame_type >= 248 && frame_type <= 250) { CLASS_READ_U2(offset_delta, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tsame_locals_l_stack_item_frame_extended\toffset_delta: %d\n", offset_delta); } if (frame_type == 251) { CLASS_READ_U2(offset_delta, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tsame_frame_extended\toffset_delta: %d\n", offset_delta); } if (frame_type >= 252 && frame_type <= 254) { CLASS_READ_U2(offset_delta, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tappend_frame\toffset_delta: %d\n", offset_delta); locals_num = frame_type - 251; printf("\t\tlocals_num: %d\n", locals_num); __parse_verification_type_info(locals_num); } } } int __parse_stack_map_table(void) { u4 attribute_length; u2 number_of_entries; u2 idx; CLASS_READ_U4(attribute_length, (*(u4 *)p_mem)) p_mem = 4; printf("\t\tattribute_length: %d\n", attribute_length); CLASS_READ_U2(number_of_entries, (*(u2 *)p_mem)) p_mem = 2; printf("\t\tnumber_of_entries: %d\n", number_of_entries); __parse_stack_map_frame(number_of_entries); return 0; } /* attribute_name_index has been parsed before. */ int parse_code_attribute(void) { u2 attribute_name_index; u4 attribute_length; u2 max_stack; u2 max_locals; u4 code_length; u1 *code; u2 exception_table_length; u2 attributes_count; u2 idx; CLASS_READ_U4(attribute_length, (*(u4 *)p_mem)) p_mem = 4; printf("\tattribute_length: %d\n", attribute_length); CLASS_READ_U2(max_stack, (*(u2 *)p_mem)) p_mem = 2; printf("\tmax_stack: %d\n", max_stack); CLASS_READ_U2(max_locals, (*(u2 *)p_mem)) p_mem = 2; printf("\tmax_locals: %d\n", max_locals); CLASS_READ_U4(code_length, (*(u4 *)p_mem)) p_mem = 4; printf("\tcode_length: %d\n", code_length); code = (u1 *)malloc(code_length 1); if (!code) { printf("Malloc failed.\n"); return -1; } memcpy(code, p_mem, code_length); code[code_length] = '\0'; p_mem = code_length; CLASS_READ_U2(exception_table_length, (*(u2 *)p_mem)) p_mem = 2; printf("\texception_table_length: %d\n", exception_table_length); __parse_exception_table(exception_table_length); CLASS_READ_U2(attributes_count, (*(u2 *)p_mem)) p_mem = 2; printf("\tattributes_count: %d\n", attributes_count); /* parse attributes */ for (idx = 0; idx < attributes_count; idx ) { CLASS_READ_U2(attribute_name_index, (*(u2 *)p_mem)) p_mem = 2; printf("\tidx: %d attribute_name_index: %d", idx 1, attribute_name_index); if (!strcmp(constant_info[attribute_name_index].base, "LineNumberTable")) { printf("\n\tparse LineNumberTable:\n"); __parse_line_number_table(); } if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) { printf("\n\tparse StackMapTable:\n"); __parse_stack_map_table(); } if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTable")) { ; } if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTypeTable")) { ; } if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) { ; } } return 0; } int parse_class_method(void) { u2 method_count; u2 access_flags, name_index; u2 descriptor_index, attributes_count; u2 idx; printf("\n---------------parse class method-------------------------:\n\n"); CLASS_READ_U2(method_count, (*(u2 *)p_mem)) p_mem = 2; printf("method_count: %d\n", method_count); for (idx = 0; idx < method_count; idx ) { CLASS_READ_U2(access_flags, (*(u2 *)p_mem)) p_mem = 2; printf("access_flags: 0x%x\n", access_flags); CLASS_READ_U2(name_index, (*(u2 *)p_mem)) p_mem = 2; printf("name_index: %d\n", name_index); CLASS_READ_U2(descriptor_index, (*(u2 *)p_mem)) p_mem = 2; printf("descriptor_index: %d\n", descriptor_index); CLASS_READ_U2(attributes_count, (*(u2 *)p_mem)) p_mem = 2; printf("attributes_count: %d\n\n", attributes_count); /* parse attributes */ CLASS_READ_U2(name_index, (*(u2 *)p_mem)) p_mem = 2; printf("attritbutes name_index: %d\n", name_index); if (!strcmp(constant_info[name_index].base, "Code")) { printf("parse code attribute:\n"); parse_code_attribute(); } if (!strcmp(constant_info[name_index].base, "Exceptions")) { ; } if (!strcmp(constant_info[name_index].base, "Signature")) { ; } } return 0; } int jvm_parse_class_file(const char *class_file) { assert(class_file != NULL); if (mmap_class_file(class_file) == -1) return -1; p_mem = class_start_mem; if (parse_class_magic() == -1) goto out; if (parse_class_version() == -1) goto out; if (parse_class_constant() == -1) goto out; if (parse_class_access_flag() == -1) goto out; if (parse_class_this_super() == -1) goto out; if (parse_class_interface() == -1) goto out; if (parse_class_filed() == -1) goto out; if (parse_class_method() == -1) goto out; mmap_exit(); return 0; out: mmap_exit(); return -1; } void jvm_usage(const char *proc) { fprintf(stdout, "usage: %s \n", proc); } int main(int argc, char **argv) { if (argc == 1) { jvm_usage(argv[0]); return 0; } jvm_parse_class_file(argv[1]); return 0; }.h>.h>.h>.h>.h>.h>
////////////////////////////////////////////////////////////////////////////
[email protected] # gcc -o classreader classreader.c -w [email protected] # ./classreader test.class test.class file len: 462 mmap test.class at 0x2b0b78fa5000 magic: 0xcafebabe jvm class magic match: 0xcafebabe jvm class minor_version: 0 jvm class major_version: 50 -----------parse contant pool count----------------------: jvm constant_pool_count: 30 - idx: 1 constant tag: 10 class_index: 6, name_and_type_index: 16 - idx: 2 constant tag: 9 class_index: 17, name_and_type_index: 18 - idx: 3 constant tag: 8 string index: 19 - idx: 4 constant tag: 10 class_index: 20, name_and_type_index: 21 - idx: 5 constant tag: 7 name_index: 22 - idx: 6 constant tag: 7 name_index: 23 - idx: 7 constant tag: 1 len: 6 - idx: 8 constant tag: 1 len: 3 ()V - idx: 9 constant tag: 1 len: 4 Code - idx: 10 constant tag: 1 len: 15 LineNumberTable - idx: 11 constant tag: 1 len: 4 main - idx: 12 constant tag: 1 len: 22 ([Ljava/lang/String;)V - idx: 13 constant tag: 1 len: 13 StackMapTable - idx: 14 constant tag: 1 len: 10 SourceFile - idx: 15 constant tag: 1 len: 9 test.java - idx: 16 constant tag: 12 name_index: 7, descriptor_index: 8 - idx: 17 constant tag: 7 name_index: 24 - idx: 18 constant tag: 12 name_index: 25, descriptor_index: 26 - idx: 19 constant tag: 1 len: 4 hehe - idx: 20 constant tag: 7 name_index: 27 - idx: 21 constant tag: 12 name_index: 28, descriptor_index: 29 - idx: 22 constant tag: 1 len: 4 test - idx: 23 constant tag: 1 len: 16 java/lang/Object - idx: 24 constant tag: 1 len: 16 java/lang/System - idx: 25 constant tag: 1 len: 3 out - idx: 26 constant tag: 1 len: 21 Ljava/io/PrintStream; - idx: 27 constant tag: 1 len: 19 java/io/PrintStream - idx: 28 constant tag: 1 len: 7 println - idx: 29 constant tag: 1 len: 21 (Ljava/lang/String;)V access_flag: 0x21 this_class: 5 super_class: 6 interfaces_count: 0 filed_count: 0 ---------------parse class method-------------------------: method_count: 2 access_flags: 0x1 name_index: 7 descriptor_index: 8 attributes_count: 1 attritbutes name_index: 9 parse code attribute: attribute_length: 29 max_stack: 1 max_locals: 1 code_length: 5 exception_table_length: 0 attributes_count: 1 idx: 1 attribute_name_index: 10 parse LineNumberTable: attribute_length: 6 line_number_table_length: 1 start_pc: 0 line_number: 5 access_flags: 0x9 name_index: 11 descriptor_index: 12 attributes_count: 1 attritbutes name_index: 9 parse code attribute: attribute_length: 77 max_stack: 2 max_locals: 2 code_length: 24 exception_table_length: 0 attributes_count: 2 idx: 1 attribute_name_index: 10 parse LineNumberTable: attribute_length: 22 line_number_table_length: 5 start_pc: 0 line_number: 7 start_pc: 2 line_number: 9 start_pc: 9 line_number: 10 start_pc: 17 line_number: 9 start_pc: 23 line_number: 11 idx: 2 attribute_name_index: 13 parse StackMapTable: attribute_length: 7 number_of_entries: 2 frame_type: 252 append_frame offset_delta: 4 locals_num: 1 tag: 1 ITEM_Integer. frame_type: 18 same_frame offset_delta: 18 [email protected] #
这两天在class文件解析器的基础上, 加上了java反汇编的功能, 反汇编器是指令解释器的基础,通过编写反汇编器可以熟悉jvm的指令系统, 不过jvm的指令一共有201个,反汇编过程基本就是个体力活。在《java虚拟机规范》中对每一条指令都有了详细的描述,下面说说我是如何解析bytecode的:
一个java文件经过javac编译后会生成class格式文件, 在class格式中method字段里会有Code属性,Code属性包含了java的指令码和长度。 首先用class解析器将指令码提取出来, 举个例子:
test.java
class aa { int a = 6; }; public class test { public static void main(String args[]) { int i = 0; for (i = 0; i < 5; i++) System.out.println("hehe"); } }
我们用class文件解析器把test对应的bytecode打印出来:
len: 5
0x2a0xb70x00x10xb1
这一串bytecode为:0x2a0xb70x00x10xb1, 长度是5个字节。
对照《java虚拟机规范》我们来一步步手工解析:
0x2a代表aload_0指令, 它将本地局部变量中的第一个变量压入到堆栈里。这个指令本身长度就是一个字节,没有参数, 因此0x2a的解析就非常简单, 直接在屏幕打印出aload_0即可:
printf(“%s\n”, symbol);
0xb7代表invokespecial 它用来调用超类构造方法,实例初始化方法, 私有方法。它的用法如下:
invokespecial indexbyte1 indexbyte2,indexbyte1和indexbyte2各占一个字节,用(indexbyte1 << 8) | indexbyte2来构建一个常量池中的索引。每个jvm指令本身都占用一个字节,加上它的两个参数, invokespecial语句它将占用3个字节空间。 所以它的解析算法如下:
u2 index; index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); printf("%s #%x\n", symbol, index);
注意0xb7解析完后,我们要跳过3个字节的地址,那么就是0xb1了, 它是return指令,没有参数,因此它的解析方法跟aload_0一样:
printf(“%s\n”, symbol);
以上是我们手工解析的过程, 但是jvm有201条指令, 我们需要建立一个合适的数据结构:
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base); typedef struct bytecode_st { u2 opcode; // jvm的指令码 u2 opcode_len; // 指令总的长度,包括参数 char symbol[OPCODE_SYMBOL_LEN]; // 指令对应的助记符 interp_func func; // 解析指令的回调函数 }BYTECODE;
我们可以直接建立一个大的BYTECODE数组:
BYTECODE jvm_byte_code[OPCODE_LEN] = { {0x00, 1, "nop", jvm_interp_nop}, {0x01, 1, "aconst_null", jvm_interp_aconst_null}, {0x02, 1, "iconst_m1", jvm_interp_iconst_m1}, {0x03, 1, "iconst_0", jvm_interp_iconst_0}, {0x04, 1, "iconst_1", jvm_interp_iconst_1}, {0x05, 1, "iconst_2", jvm_interp_iconst_2}, {0x06, 1, "iconst_3", jvm_interp_iconst_3}, {0x07, 1, "iconst_4", jvm_interp_iconst_4}, {0x08, 1, "iconst_5", jvm_interp_iconst_5}, {0x09, 1, "lconst_0", jvm_interp_lconst_0}, {0x0a, 1, "lconst_1", jvm_interp_lconst_1}, {0x0b, 1, "fconst_0", jvm_interp_fconst_0}, {0x0c, 1, "fconst_1", jvm_interp_fconst_1}, {0x0d, 1, "fconst_2", jvm_interp_fconst_2}, {0x0e, 1, "dconst_0", jvm_interp_dconst_0}, {0x0f, 1, "dconst_1", jvm_interp_dconst_1}, {0x10, 1, "bipush", jvm_interp_bipush}, {0x11, 1, "sipush", jvm_interp_sipush}, {0x12, 2, "ldc", jvm_interp_ldc}, {0x13, 1, "ldc_w", jvm_interp_ldc_w}, {0x14, 1, "ldc2_w", jvm_interp_ldc2_w}, {0x15, 1, "iload", jvm_interp_iload}, {0x16, 1, "lload", jvm_interp_lload}, {0x17, 1, "fload", jvm_interp_fload}, {0x18, 1, "dload", jvm_interp_dload}, {0x19, 1, "aload", jvm_interp_aload}, {0x1a, 1, "iload_0", jvm_interp_iload_0}, {0x1b, 1, "iload_1", jvm_interp_iload_1}, {0x1c, 1, "iload_2", jvm_interp_iload_2}, {0x1d, 1, "iload_3", jvm_interp_iload_3}, {0x1e, 1, "lload_0", jvm_interp_lload_0}, {0x1f, 1, "lload_1", jvm_interp_lload_1}, {0x20, 1, "lload_2", jvm_interp_lload_2}, {0x21, 1, "lload_3", jvm_interp_lload_3}, {0x22, 1, "fload_0", jvm_interp_fload_0}, {0x23, 1, "fload_1", jvm_interp_fload_1}, {0x24, 1, "fload_2", jvm_interp_fload_2}, {0x25, 1, "fload_3", jvm_interp_fload_3}, {0x26, 1, "dload_0", jvm_interp_dload_0}, {0x27, 1, "dload_1", jvm_interp_dload_1}, {0x28, 1, "dload_2", jvm_interp_dload_2}, {0x29, 1, "dload_3", jvm_interp_dload_3}, {0x2a, 1, "aload_0", jvm_interp_aload_0}, {0x2b, 1, "aload_1", jvm_interp_aload_1}, {0x2c, 1, "aload_2", jvm_interp_aload_2}, {0x2d, 1, "aload_3", jvm_interp_aload_3}, {0x2e, 1, "iaload", jvm_interp_iaload}, {0x2f, 1, "laload", jvm_interp_laload}, {0x30, 1, "faload", jvm_interp_faload}, {0x31, 1, "daload", jvm_interp_daload}, {0x32, 1, "aaload", jvm_interp_aaload}, {0x33, 1, "baload", jvm_interp_baload}, {0x34, 1, "caload", jvm_interp_caload}, {0x35, 1, "saload", jvm_interp_saload}, {0x36, 1, "istore", jvm_interp_istore}, {0x37, 1, "lstore", jvm_interp_lstore}, {0x38, 1, "fstore", jvm_interp_fstore}, {0x39, 1, "dstore", jvm_interp_dstore}, {0x3a, 1, "astore", jvm_interp_astore}, {0x3b, 1, "istore_0", jvm_interp_istore_0}, {0x3c, 1, "istore_1", jvm_interp_istore_1}, {0x3d, 1, "istore_2", jvm_interp_istore_2}, {0x3e, 1, "istore_3", jvm_interp_istore_3}, {0x3f, 1, "lstore_0", jvm_interp_lstore_0}, {0x40, 1, "lstore_1", jvm_interp_lstore_1}, {0x41, 1, "lstore_2", jvm_interp_lstore_2}, {0x42, 1, "lstore_3", jvm_interp_lstore_3}, {0x43, 1, "fstore_0", jvm_interp_fstore_0}, {0x44, 1, "fstore_1", jvm_interp_fstore_1}, {0x45, 1, "fstore_2", jvm_interp_fstore_2}, {0x46, 1, "fstore_3", jvm_interp_fstore_3}, {0x47, 1, "dstore_0", jvm_interp_dstore_0}, {0x48, 1, "dstore_1", jvm_interp_dstore_1}, {0x49, 1, "dstore_2", jvm_interp_dstore_2}, {0x4a, 1, "dstore_3", jvm_interp_dstore_3}, {0x4b, 1, "astore_0", jvm_interp_astore_0}, {0x4c, 1, "astore_1", jvm_interp_astore_1}, {0x4d, 1, "astore_2", jvm_interp_astore_2}, {0x4e, 1, "astore_3", jvm_interp_astore_3}, {0x4f, 1, "iastore", jvm_interp_iastore}, {0x50, 1, "lastore", jvm_interp_lastore}, {0x51, 1, "fastore", jvm_interp_fastore}, {0x52, 1, "dastore", jvm_interp_dastore}, {0x53, 1, "aastore", jvm_interp_aastore}, {0x54, 1, "bastore", jvm_interp_bastore}, {0x55, 1, "castore", jvm_interp_castore}, {0x56, 1, "sastore", jvm_interp_sastore}, {0x57, 1, "pop", jvm_interp_pop}, {0x58, 1, "pop2", jvm_interp_pop2}, {0x59, 1, "dup", jvm_interp_dup}, {0x5a, 1, "dup_x1", jvm_interp_dup_x1}, {0x5b, 1, "dup_x2", jvm_interp_dup_x2}, {0x5c, 1, "dup2", jvm_interp_dup2}, {0x5d, 1, "dup2_x1", jvm_interp_dup2_x1}, {0x5e, 1, "dup2_x2", jvm_interp_dup2_x2}, {0x5f, 1, "swap", jvm_interp_swap}, {0x60, 1, "iadd", jvm_interp_iadd}, {0x61, 1, "ladd", jvm_interp_ladd}, {0x62, 1, "fadd", jvm_interp_fadd}, {0x63, 1, "dadd", jvm_interp_dadd}, {0x64, 1, "isub", jvm_interp_isub}, {0x65, 1, "lsub", jvm_interp_lsub}, {0x66, 1, "fsub", jvm_interp_fsub}, {0x67, 1, "dsub", jvm_interp_dsub}, {0x68, 1, "imul", jvm_interp_imul}, {0x69, 1, "lmul", jvm_interp_lmul}, {0x6a, 1, "fmul", jvm_interp_fmul}, {0x6b, 1, "dmul", jvm_interp_dmul}, {0x6c, 1, "idiv", jvm_interp_idiv}, {0x6d, 1, "ldiv", jvm_interp_ldiv}, {0x6e, 1, "fdiv", jvm_interp_fdiv}, {0x6f, 1, "ddiv", jvm_interp_ddiv}, {0x70, 1, "irem", jvm_interp_irem}, {0x71, 1, "lrem", jvm_interp_lrem}, {0x72, 1, "frem", jvm_interp_frem}, {0x73, 1, "drem", jvm_interp_drem}, {0x74, 1, "ineg", jvm_interp_ineg}, {0x75, 1, "lneg", jvm_interp_lneg}, {0x76, 1, "fneg", jvm_interp_fneg}, {0x77, 1, "dneg", jvm_interp_dneg}, {0x78, 1, "ishl", jvm_interp_ishl}, {0x79, 1, "lshl", jvm_interp_lshl}, {0x7a, 1, "ishr", jvm_interp_ishr}, {0x7b, 1, "lshr", jvm_interp_lshr}, {0x7c, 1, "iushr", jvm_interp_iushr}, {0x7d, 1, "lushr", jvm_interp_lushr}, {0x7e, 1, "iand", jvm_interp_iand}, {0x7f, 1, "land", jvm_interp_land}, {0x80, 1, "ior", jvm_interp_ior}, {0x81, 1, "lor", jvm_interp_lor}, {0x82, 1, "ixor", jvm_interp_ixor}, {0x83, 1, "lxor", jvm_interp_lxor}, {0x84, 3, "iinc", jvm_interp_iinc}, {0x85, 1, "i2l", jvm_interp_i2l}, {0x86, 1, "i2f", jvm_interp_i2f}, {0x87, 1, "i2d", jvm_interp_i2d}, {0x88, 1, "l2i", jvm_interp_l2i}, {0x89, 1, "l2f", jvm_interp_l2f}, {0x8a, 1, "l2d", jvm_interp_l2d}, {0x8b, 1, "f2i", jvm_interp_f2i}, {0x8c, 1, "f2l", jvm_interp_f2l}, {0x8d, 1, "f2d", jvm_interp_f2d}, {0x8e, 1, "d2i", jvm_interp_d2i}, {0x8f, 1, "d2l", jvm_interp_d2l}, {0x90, 1, "d2f", jvm_interp_d2f}, {0x91, 1, "i2b", jvm_interp_i2b}, {0x92, 1, "i2c", jvm_interp_i2c}, {0x93, 1, "i2s", jvm_interp_i2s}, {0x94, 1, "lcmp", jvm_interp_lcmp}, {0x95, 1, "fcmpl", jvm_interp_fcmpl}, {0x96, 1, "fcmpg", jvm_interp_fcmpg}, {0x97, 1, "dcmpl", jvm_interp_dcmpl}, {0x98, 1, "dcmpg", jvm_interp_dcmpg}, {0x99, 1, "ifeq", jvm_interp_ifeq}, {0x9a, 1, "ifne", jvm_interp_ifne}, {0x9b, 1, "iflt", jvm_interp_iflt}, {0x9c, 1, "ifge", jvm_interp_ifge}, {0x9d, 1, "ifgt", jvm_interp_ifgt}, {0x9e, 1, "ifle", jvm_interp_ifle}, {0x9f, 1, "if_icmpeq", jvm_interp_if_icmpeq}, {0xa0, 1, "if_icmpne", jvm_interp_if_icmpne}, {0xa1, 1, "if_icmplt", jvm_interp_if_icmplt}, {0xa2, 3, "if_icmpge", jvm_interp_if_icmpge}, {0xa3, 1, "if_icmpgt", jvm_interp_if_icmpgt}, {0xa4, 1, "if_icmple", jvm_interp_if_icmple}, {0xa5, 1, "if_acmpeq", jvm_interp_if_acmpeq}, {0xa6, 1, "if_acmpne", jvm_interp_if_acmpne}, {0xa7, 3, "goto", jvm_interp_goto}, {0xa8, 1, "jsr", jvm_interp_jsr}, {0xa9, 1, "ret", jvm_interp_ret}, {0xaa, 1, "tableswitch", jvm_interp_tableswitch}, {0xab, 1, "lookupswitch", jvm_interp_lookupswitch}, {0xac, 1, "ireturn", jvm_interp_ireturn}, {0xad, 1, "lreturn", jvm_interp_lreturn}, {0xae, 1, "freturn", jvm_interp_freturn}, {0xaf, 1, "dreturn", jvm_interp_dreturn}, {0xb0, 1, "areturn", jvm_interp_areturn}, {0xb1, 1, "return", jvm_interp_return}, {0xb2, 3, "getstatic", jvm_interp_getstatic}, {0xb3, 1, "putstatic", jvm_interp_putstatic}, {0xb4, 1, "getfield", jvm_interp_getfield}, {0xb5, 1, "putfield", jvm_interp_putfield}, {0xb6, 3, "invokevirtual", jvm_interp_invokevirtual}, {0xb7, 3, "invokespecial", jvm_interp_invokespecial}, {0xb8, 1, "invokestatic", jvm_interp_invokestatic}, {0xb9, 1, "invokeinterface", jvm_interp_invokeinterface}, {0xba, 1, "invokedynamic", jvm_interp_invokedynamic}, {0xbb, 1, "new", jvm_interp_new}, {0xbc, 1, "newarray", jvm_interp_newarray}, {0xbd, 1, "anewarray", jvm_interp_anewarray}, {0xbe, 1, "arraylength", jvm_interp_arraylength}, {0xbf, 1, "athrow", jvm_interp_athrow}, {0xc0, 1, "checkcast", jvm_interp_checkcast}, {0xc1, 1, "instanceof", jvm_interp_instanceof}, {0xc2, 1, "monitorenter", jvm_interp_monitorenter}, {0xc3, 1, "monitorexit", jvm_interp_monitorexit}, {0xc4, 1, "wide", jvm_interp_wide}, {0xc5, 1, "multianewarray", jvm_interp_multianewarray}, {0xc6, 1, "ifnull", jvm_interp_ifnull}, {0xc7, 1, "ifnonnull", jvm_interp_ifnonnull}, {0xc8, 1, "goto_w", jvm_interp_goto_w}, {0xc9, 1, "jsr_w", jvm_interp_jsr_w}, };
每个jvm指令的指令码就是数组的索引, 这样就能找到指令对应的BYTECODE结构,通过调用其回调函数, 就可以进入具体的解析过程了。 这样做的好处就是不用switch case一大堆分支了。
int jvm_interp_invokespecial(u2 len, char *symbol, void *base) { u2 index; index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); printf("%s #%x\n", symbol, index); } int jvm_interp_aload_0(u2 len, char *symbol, void *base) { printf("%s\n", symbol); } int jvm_interp_return(u2 len, char *symbol, void *base) { printf("%s\n", symbol); } int __disass_bytecode(u1 *base, u2 len) { u1 idx = 0; u1 index; while (idx < len) { index = *(u1 *)(base + idx); //printf("!0x%x\n", index); jvm_byte_code[index].func(jvm_byte_code[index].opcode_len, jvm_byte_code[index].symbol, base + idx); idx += (u1)jvm_byte_code[index].opcode_len; } }
目前这个反汇编器只能解析一小部分指令, 随着开发的深入, 会慢慢补全的, 下面是反汇编test.class的结果:
diassember bytecode: aload_0 invokespecial #1 return ----------------------------- iconst_0 istore_1 iconst_0 istore_1 iload_1 iconst_5 if_icmpge 17 getstatic #2 ldc #3 invokevirtual #4 iinc 1 1 goto 0xfff0 return
java工具集中提供了javap, 可以反汇编java指令,本来是想山寨一个javap的, 但是现在对jvm整体结构还是不清晰,数据结构还不能很好的设计出来, 但是随着对jvm的了解深入, 反汇编器会越来越成熟。
一、背景
笔者希望通过自己动手编写一个简单的jvm来了解java虚拟机内部的工作细节毕竟hotsopt以及android的dalvik都有几十万行的c代码级别。 在前面的2篇开发笔记中已经实现了一个class文件解析器和一个java反汇编器 在这基础上 java虚拟机的雏形也已经写好。还没有内存管理功能 没有线程支持。它能解释执行的指令取决于我的java语法范围 在这之前我对java一无所知 通过写这个jvm顺便也把java学会了
它现在的功能如下
1、java反汇编器 山寨了javap的部分功能。 2、能解释执行如下jvm指令
iload_n, istore_n, aload_n, astore_n, iadd, isub, bipush, invokespecail, invokestatic, invokevirtual, goto, return, ireturn, if_icmpge, putfiled, new, dup
源码地址 http://www.cloud-sec.org/jvm.tgz 举2个测试例子
test.java =========
class aa { int a = 6; int debug(int a, int b) { int sum; sum = a + b; return sum; } } public class test { public static void main(String args[]) { int a; aa bb = new aa(); a = bb.debug(1, 2); } }
test7.java
==========
public class test7 { static int sub(int value) { int a = 1; return value - 1; } static int add(int a, int b) { int sum = 0; int c; sum = a + b; c = sub(sum); return c; } public static void main(String args[]) { int a = 1, b = 2; int ret; ret = add(a, b); return ; } }
二、JVM架构
2个核心文件:
classloader.c – 从硬盘加载class文件并解析。
interp_engine.c – bytecode解释器。
运行时数据区
————————————————————–
| 方法区(method) | 堆栈(stack) | 程序计数器(pc) |
————————————————————–
注意这里缺少了heap, native stack 因为我们现在还不支持这些功能。
每个method都有自己对应的栈帧stack frame 在class文件解析的时候就已经创建好。
typedef struct jvm_stack_frame { u1 *local_var_table; // 本地变量表的指针 u1 *operand_stack; // 操作数栈的指针 u4 *method; u1 *return_addr; // method调用函数的时候保存的返回地址 u4 offset; // 操作数栈的偏移量 u2 max_stack; // 本地变量表中的变量数量 u2 max_locals; // 操作数栈的变量数量 struct jvm_stack_frame *prev_stack; // 指向前一个栈帧结构 }JVM_STACK_FRAME;
定义了一个叫curr_jvm_stack的全局变量 它用来保存当前解释器使用的栈帧结构 在jvm初始化的时候进行设置
int jvm_stack_init(void) { curr_jvm_stack = (JVM_STACK_FRAME *)malloc(sizeof(JVM_STACK_FRAME)); if (!curr_jvm_stack) { __error("malloc failed."); return -1; } memset(curr_jvm_stack, '', sizeof(JVM_STACK_FRAME)); jvm_stack_depth = 0; return 0; }
三、实现细节
1、 虚拟机执行过程
初始化jvm_init()
从磁盘加载class文件并解析在内存建立方法区数据结构 初始化内存堆栈 初始化jvm运行环境。
解释器运行 jvm_run()
初始化程序计数器pc, 从方法区中查找main函数开始解释执行。
退出 jvm_exit()
释放所有数据结构
2、class文件加载与解析
对于每一个class文件使用CLASS数据结构表示
typedef struct jvm_class { u4 class_magic; u2 access_flag; u2 this_class; u2 super_class; u2 minor_version; u2 major_version; u2 constant_pool_count; u2 interfaces_count; u2 fileds_count; u2 method_count; char class_file[1024]; struct constant_info_st *constant_info; struct list_head interface_list_head; struct list_head filed_list_head; struct list_head method_list_head; struct list_head list; }CLASS;
CLASS结构的前部分是按java虚拟机规范中对class文件结构的描述设置的。 class_file保存的是这个CLASS结构对应的磁盘class文件名。constant_info保存的是class文件常量池的字符串。utf8interface_list_headfiled_list_headmethod_list_head分别是接口字段 方法的链表头。
在解析class文件的时候 只解析了java虚拟机规范中规定的一个jvm最起码能解析的属性。 这个部分没什么好说的大家直接看源码 在对照java虚拟机规范就能看懂了。
3、解释器设计
java虚拟机规范中一共涉及了201条指令。没有使用switch case这种常用的算法。而是为每个jvm指令设计了一个数据结构
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base); typedef struct bytecode_st { u2 opcode; u2 opcode_len; char symbol[OPCODE_SYMBOL_LEN]; interp_func func; }BYTECODE;
opcode是jvm指令的机器码 opcode_len是这条jvm指令的长度symbol指令的助记符func是具体的这条指令解释函数。事先建立了一个BYTECODE数组
BYTECODE jvm_byte_code[OPCODE_LEN] = { {0x00, 1, "nop", jvm_interp_nop}, {0x01, 1, "aconst_null", jvm_interp_aconst_null}, {0x02, 1, "iconst_m1", jvm_interp_iconst_m1}, {0x03, 1, "iconst_0", jvm_interp_iconst_0}, {0x04, 1, "iconst_1", jvm_interp_iconst_1}, {0x05, 1, "iconst_2", jvm_interp_iconst_2}, {0x06, 1, "iconst_3", jvm_interp_iconst_3}, {0x07, 1, "iconst_4", jvm_interp_iconst_4}, {0x08, 1, "iconst_5", jvm_interp_iconst_5}, {0x09, 1, "lconst_0", jvm_interp_lconst_0}, {0x0a, 1, "lconst_1", jvm_interp_lconst_1}, {0x0b, 1, "fconst_0", jvm_interp_fconst_0}, ... {0xc5, 1, "multianewarray", jvm_interp_multianewarray}, {0xc6, 1, "ifnull", jvm_interp_ifnull}, {0xc7, 1, "ifnonnull", jvm_interp_ifnonnull}, {0xc8, 1, "goto_w", jvm_interp_goto_w}, {0xc9, 1, "jsr_w", jvm_interp_jsr_w}, }; int jvm_interp_invokespecial(u2 len, char *symbol, void *base) { u2 index; index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); printf("%s #%xn", symbol, index); } int jvm_interp_aload_0(u2 len, char *symbol, void *base) { printf("%sn", symbol); } int jvm_interp_return(u2 len, char *symbol, void *base) { printf("%sn", symbol); }
对于一段bytecode0x2a0xb70x00x10xb1 手工解析如下
0x2a代表aload_0指令 它将本地局部变量中的第一个变量压入到堆栈里。这个指令本身长度就是一个字节没有参数 因此0x2a的解析就非常简单 直接在屏幕打印出aload_0即可
printf(“%sn”, symbol);
0xb7代表invokespecial 它用来调用超类构造方法实例初始化方法 私有方法。它的用法如下
invokespecial indexbyte1 indexbyte2indexbyte1和indexbyte2各占一个字节用(indexbyte1 << 8) | indexbyte2来构建一个常量池中的索引。每个jvm指令本身都占用一个字节加上它的两个参数 invokespecial语句它将占用3个字节空间。 所以它的解析算法如下
u2 index; index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); printf("%s #%xn", symbol, index);
注意0xb7解析完后我们要跳过3个字节的地址那么就是0xb1了 它是return指令没有参数因此它的解析方法跟aload_0一样
printf(“%sn”, symbol);
用程序代码实现是
int interp_bytecode(CLASS_METHOD *method) { jvm_stack_depth++; // 函数掉用计数加1 curr_jvm_stack = &method->code_attr->stack_frame; // 设置当前栈帧指针 curr_jvm_interp_env->constant_info = method->class->constant_info; // 设置当前运行环境 curr_jvm_interp_env->prev_env = NULL; for (;;) { if (jvm_stack_depth == 0) { // 为0代表所有函数执行完毕 printf("interpret bytecode done.n"); break; } index = *(u1 *)jvm_pc.pc; // 设置程序计数器 jvm_byte_code[index].func(jvm_byte_code[index].opcode_len, // 解释具体指令 jvm_byte_code[index].symbol, jvm_pc.pc); sleep(1); } }
举个例子
int jvm_interp_iadd(u2 len, char *symbol, void *base) { u4 tmp1, tmp2; printf("%sn", symbol); pop_operand_stack(int, tmp1) pop_operand_stack(int, tmp2) push_operand_stack(int, (tmp1 + tmp2)) jvm_pc.pc += len; }
jvm_interp_iadd用于解释执行iadd指令 首先从操作数栈中弹出2个int型变量tmp1, tmp2。
把tmp1 + tmp2相加后在压入到操作数栈里。
下面是test7.java的执行演示
public class test7 { static int sub(int value) { int a = 1; return value - 1; } static int add(int a, int b) { int sum = 0; int c; sum = a + b; c = sub(sum); return c; } public static void main(String args[]) { int a = 1, b = 2; int ret; ret = add(a, b); return ; } }
ajvm是一个笔者正在开发中的java虚拟机, 用c和少量汇编语言编写, 目的在于探究一个可运行的java虚拟机是如何实现的, 目前整个jvm的source code代码量在5000行左右, 预计控制在1w行以内,只要能运行简单的java代码即可。笔者希望ajvm能变成一个教学用的简单java虚拟机实现, 帮助java程序员在陷入庞大的hotspot vm源码之前, 能对jvm的结构有个清晰的认识。 ajvm是笔者利用业余时间编写的, 每次完成一个重要功能都会以笔记的形式发布到ata, 和大家共同学习和探讨。
git repo: https://github.com/cloudsec/ajvm git clone [email protected]:cloudsec/ajvm.git
最近笔者给ajvm增加了stack calltrace的功能, 用于帮助和调试jvm crash后的信息。 大家知道oracle的hotspot jvm在crash后会给出大量的crash信息, 这些信息能帮助jvm开发人员快速定位问题。同样, ajvm也增加了类似的功能:
1、calltrace(), 打印函数调用栈。
2、截获SIGSEGV信号, jvm segfault后, 打印离堆栈指针rsp最近的16字节信息;打印cpu寄存器信息;打印函数调用栈。
首先看如何打印函数调用栈:
笔者在《理解堆栈及其利用方法 》: http://blog.aliyun.com/964?spm=0.0.0.0.BykR2E
这篇paper中详细讲述了intel x86和x86_64下进程堆栈的结构, 关于堆栈的基础知识请大家参考此paper。
下面举一个简单的例子:
#include #include "trace.h" #include "log.h" void test2() { calltrace(); *(int *)0 = 0; } void test1() { test2(); } void test() { test1(); } int main(void) { log_init(); GET_BP(top_rbp); calltrace_init(); test(); return 0; }
在test2函数中调用了calltrace()函数, 用来打印它的函数调用栈, 我们知道它的函数调用栈是这样的: main->test->test1->test2->calltrace。我们想让calltrace的输出信息类似如下:
test2 test1 test main
要完成此功能, 我们要利用gcc编译器的一个特点, 注意在-O2或-fomit-frame-pointer参数下, 这个方法就无效了。 反汇编这个程序后, 会发现每个函数调用的开头总会有这么几句汇编指令:
0000000000401138 : 401138: 55 push %rbp 401139: 48 89 e5 mov %rsp,%rbp 000000000040114e : 40114e: 55 push %rbp 40114f: 48 89 e5 mov %rsp,%rbp 000000000040115e : 40115e: 55 push %rbp 40115f: 48 89 e5 mov %rsp,%rbp 000000000040116e : 40116e: 55 push %rbp 40116f: 48 89 e5 mov %rsp,%rbp
大家想起来了吧, rbp在intel处理器中代表的是一个堆栈中栈帧开始的地址, rsp代表当前堆栈栈顶的地址。在c语言中一个函数的调用过程是这样的:
test() { test1(); }
在test函数中调用test1()的时候, cpu会先自动把test1函数后面的指令地址压入test1函数的栈帧里, 然后在执行push rbp; mov rsp, rbp指令。 我们画一下,从main函数到calltrace函数的整个堆栈栈帧结构:
|...| |rbp|<--| push rbp; mov rsp, rbp ctrace->|rip| | call calltrace + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp test2-> |rip| | call test2 + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp test1-> |rip| | call test1 + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp test-> |rip| | call test + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp main-> |rip| | call main + 1 |...| | glibc |...|<--| rbp->unkonwn
所以在正常情况下堆栈的栈帧中每个rbp后面,保存的都是上一个函数的返回地址, calltrace的实现其实就很简单了, 首先得到rbp的地址,然后rbp后面的地址就是ret rip的地址, 通过这个地址,我们可以解析出栈帧对应的符号信息, 因为ajvm通过自己解析elf文件, 来获得符号表信息。 calltrace的大致实现如下:
void calltrace(void) { CALL_TRACE trace, prev_trace; uint64_t *rbp, rip, real_rip; int flag = 0, first_bp = 0; printf("Call trace:\n\n"); GET_BP(rbp) while (rbp != top_rbp) { rip = *(uint64_t *)(rbp + 1); rbp = (uint64_t *)*rbp; real_rip = compute_real_func_addr(rip); if (flag == 1) { if (search_symbol_by_addr(real_rip, &prev_trace) == -1) { __error("calltrace: search symbol failed."); exit(-1); } prev_trace.rip = rip - 5; prev_trace.offset = trace.rip - prev_trace.symbol_addr; show_calltrace(&prev_trace); trace = prev_trace; } else { if (search_symbol_by_addr(real_rip, &trace) == -1) { __error("calltrace: search symbol failed."); exit(-1); } trace.rip = rip - 5; flag = 1; } } printf("\n"); }
我们刚才讲ajvm还截获了进程的SIGSEGV信号处理流程, 在jvm初始化的时候,通过signal_init()来实现:
int signal_init(void) { struct sigaction sa; sa.sa_flags = SA_SIGINFO; sa.sa_sigaction = signal_handler; sigemptyset(&sa.sa_mask); if (sigaction(SIGSEGV, &sa, NULL) == -1) { perror("sigaction"); return -1; } return 0; }
当jvm crash后, signal_handler()函数接管了信号的处理流程, 注意此时整个jvm进程的堆栈结构跟calltrace结构有一点不一样:
|...| |rbp|<--| push rbp; mov rsp, rbp do_sig->|eip| | unkown |...|<----- segfault |...| |rbp|<--| push rbp; mov rsp, rbp test2-> |rip| | call test2 + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp test1-> |rip| | call test1 + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp test-> |rip| | call test + 1 |...| | |rbp|<--| push rbp; mov rsp, rbp main-> |rip| | call main + 1 |...| | glibc |...|<--| rbp->unkonwn
test2并没有调用do_sig函数, 这是因为test2函数里有一个空指针引用的操作, 操作系统内核在处理这个缺页异常中断的时候, 向进程发送了SIGSEGV信号, 通常情况下, 会直接杀死进程, 但是这个信号被do_sig函数接管了, 我们要在这个函数里打印充足的调试信息后, 在退出进程。
void signal_handler(int sig_num, siginfo_t *sig_info, void *ptr) { CALL_TRACE trace, prev_trace; uint64_t *rbp, rip, real_rip; int flag = 0, first_bp = 0; assert(sig_info != NULL); printf("\nPid: %d segfault at addr: 0x%016x\tsi_signo: %d\tsi_errno: %d\n\n", getpid(), sig_info->si_addr, sig_info->si_signo, sig_info->si_errno); show_stack(); show_registers(); printf("Call trace:\n\n"); GET_BP(rbp) while (rbp != top_rbp) { rip = *(uint64_t *)(rbp + 1); rbp = (uint64_t *)*rbp; real_rip = compute_real_func_addr(rip); if (flag == 1) { if (search_symbol_by_addr(real_rip, &prev_trace) == -1) { __error("calltrace: search symbol failed."); exit(-1); } prev_trace.rip = rip - 5; if (first_bp == 0) { first_bp = 1; prev_trace.offset = 0; } else { prev_trace.offset = trace.rip - prev_trace.symbol_addr; } show_calltrace(&prev_trace); trace = prev_trace; } else { /* it's in a single handler function, the last call frame is unkown, * we can't locate the rip addr. */ search_symbol_by_addr(real_rip, &trace); trace.rip = rip - 5; flag = 1; } } printf("\n"); exit(0); }
至于show_stack()和show_registers()函数就很简单了:
#define GET_BP(x) asm("movq %%rbp, %0":"=r"(x)); #define GET_SP(x) asm("movq %%rsp, %0":"=r"(x)); #define GET_AX(x) asm("movq %%rax, %0":"=r"(x)); #define GET_BX(x) asm("movq %%rbx, %0":"=r"(x)); #define GET_CX(x) asm("movq %%rcx, %0":"=r"(x)); #define GET_DX(x) asm("movq %%rdx, %0":"=r"(x)); #define GET_SI(x) asm("movq %%rsi, %0":"=r"(x)); #define GET_DI(x) asm("movq %%rdi, %0":"=r"(x)); #define GET_R8(x) asm("movq %%r8, %0":"=r"(x)); #define GET_R9(x) asm("movq %%r9, %0":"=r"(x)); #define GET_R10(x) asm("movq %%r10, %0":"=r"(x)); #define GET_R11(x) asm("movq %%r11, %0":"=r"(x)); #define GET_R12(x) asm("movq %%r12, %0":"=r"(x)); #define GET_R13(x) asm("movq %%r13, %0":"=r"(x)); #define GET_R14(x) asm("movq %%r14, %0":"=r"(x)); #define GET_R15(x) asm("movq %%r15, %0":"=r"(x)); void show_stack(void) { int i; uint64_t *rsp, *rbp; GET_SP(rsp); GET_BP(rbp); printf("Stack:\t\t\nrsp: 0x%016x\t\trbp: 0x%016x\n", rsp, rbp); for (i = 0; i < 16; i++) { printf("0x%02x ", *((unsigned char *)rsp + i)); } printf("\n\n"); } void show_registers(void) { uint64_t rax, rbx, rcx, rdx, rsi, rdi; uint64_t r9, r10, r11, r12, r13, r14, r15; GET_AX(rax) GET_BX(rbx) GET_CX(rcx) GET_DX(rdx) GET_SI(rsi) GET_DI(rdi) GET_R9(r9) GET_R10(r10) GET_R11(r11) GET_R12(r12) GET_R13(r13) GET_R14(r14) GET_R15(r15) printf("Registers:\n"); printf("rax = 0x%016x, rbx = 0x%016x, rcx = 0x%016x, rdx = 0x%016x\n" "rsi = 0x%016x, rdi = 0x%016x, r8 = 0x%016x, r9 = 0x%016x\n" "r10 = 0x%016x, r11 = 0x%016x, r12 = 0x%016x, r13 = 0x%016x\n" "r14 = 0x%016x, r15 = 0x%016x\n\n", rax, rbx, rcx, rdx, rsi, rdi, r9, r10, r11, r12, r13, r14, r15); }
最后演示一下ajvm在crash后的出错信息:
Pid: 8739 segfault at addr: 0x0000000000000000 si_signo: 11 si_errno: 0 Stack: rsp: 0x00000000caa88680 rbp: 0x00000000caa886a0 0x90 0x87 0xa8 0xca 0xff 0x7f 0x00 0x00 0x58 0xd3 0xe4 0x3d 0x0c 0x00 0x00 0x00 Registers: rax = 0x000000003de6c144, rbx = 0x000000003e151780, rcx = 0x0000000000000001, rdx = 0x0000000000000001 rsi = 0x000000003de6317a, rdi = 0x0000000000000000, r8 = 0x00000000caa886a0, r9 = 0x0000000000000000 r10 = 0x000000000040accf, r11 = 0x00000000caa88790, r12 = 0x000000003de4d358, r13 = 0x00000000caa88680 r14 = 0x00000000caa886a0, r15 = 0x000000000000000b Call trace: [<0x401457>] jvm_pc_init + 0x0/0x42 [<0x4015dc>] jvm_run + 0x4b/0x7d
利用这个crash信息, 可以帮助程序员快速定位ajvm的bug。
一、 前言
ajvm是笔者正在开发中的一个java虚拟机, 想通过编写这个jvm帮助程序员了解jvm的具体实现细节, 它是国内第一个开源的java虚拟机项目:https://github.com/cloudsec/ajvm, 同时笔者把它的开发笔记也分享到了ata上。 在前面4篇笔记中, 已经实现了class文件加载器, 反汇编器,jvm的crash信息处理, 同时它已经能运行简单的java代码了。 在今天的这篇笔记中, 将开始分享ajvm的内存管理模块是如何编写的。
二、内存分配
看下面一段java代码:
public class test6 { public static void main(String args[]) { int[] data, data1; int i; int num = 0; data = new int[2]; for (i = 0; i < 2; i++) { data[i] = i; } data1 = new int[3]; } }
首先用javac编译下, 然后用ajvm的反汇编器查看bytecode:
$./wvm -d test/test6.class Diassember bytecode:()V stack: 1 local: 1 0: aload_0 1: invokespecial #1 4: return main ([Ljava/lang/String;)V stack: 3 local: 5 0: iconst_0 1: istore 4 3: iconst_2 4: newarray 10 6: astore_1 7: iconst_0 8: istore_3 9: iload_3 10: iconst_2 11: if_icmpge 13 14: aload_1 15: iload_3 16: iload_3 17: iastore 18: iinc 3 1 21: goto 0xfffffff4 24: iconst_3 25: newarray 10 27: astore_2 28: return
源码中data = new int[2];对应的汇编指令为:
4: newarray 10
根据jvm虚拟机规范的描述, newarray指令的作用是, 从操作数堆栈用取出data数组的元素个数,然后根据newarray后面的type进行计算要申请的内存大小, type的值在虚拟机规范中如下:
#define T_BOOLEAN 4 #define T_CHAR 5 #define T_FLOAT 6 #define T_DOUBLE 7 #define T_BYTE 8 #define T_SHORT 9 #define T_INT 10 #define T_LONG 11
所以10代表这个int类型的数组, 接下来就要给data这个数组从heap中分配内存了。
void *alloc_newarray_memroy(u1 atype, int count) { void *addr = NULL; switch (atype) { case T_BOOLEAN: case T_CHAR: case T_BYTE: addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(char)); break; case T_SHORT: addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(short)); break; case T_INT: case T_FLOAT: addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(int)); break; case T_LONG: case T_DOUBLE: addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(long long)); break; default: error("bad atype value.n"); return NULL; } return addr; }
ajvm的内存堆用的是slab算法, slab的内存结构如下:
------- ------ ------ ------ |cache|--> |slab| --> |slab| -->|slab| ------- ------ ------ ------ |cache| ----- |cache| ... ----- ------ ------ ------ |cache|--> |slab| --> |slab| -->|slab| ----- ------ ----- ------ |cache| ... ------- |cache| ------- |cache|-->|slab|-->|slab| -->|slab| ------- ------ ------ ------
源码中的slab.c是它完整的实现, 不熟悉slab的同学请自行google。
三、垃圾回收
gc是java程序员普遍关心的问题, 当内存不够时, 将会触发jvm的垃圾回收机制。
ajvm使用最原始的引用计数法, 需要建立一个新的数据结构:
typedef struct jvm_object { int ref_count; CLASS *class; void *addr; int size; struct list_head list; }JVM_OBJECT;
当数组申请完内存后, 将会建立一个新的JVM_OBJECT与其对应, ref_count被初始化为0, addr指向数组的首地址, size表示数组的大小, JVM_OBJECT将会被加入到jvm_obj_list_head链表中, 在这将来的垃圾回收时将会用到。
int jvm_interp_newarray(u2 len, char *symbol, void *base) { ... addr = (void *)alloc_newarray_memroy(atype, count); if (!addr) { error("slab alloc failed.n"); return -1; } printf("addr: 0x%xn", addr); new_obj = create_new_obj(addr, count); if (!new_obj) { error("create new obj failed.n"); return -1; } ... }
当数组被引用时, 我们跟数组的地址在JVM_OBJECT链表中找到它, 并且把ref_count加1, 表示这个数组在被引用。 比如上面的:
17: iastore
这条指令就会对data数组进行引用, 我们只要在iastore的解释代码里, 对data对应的ref_count加1即可:
int jvm_interp_iastore(u2 len, char *symbol, void *base) { int *addr, index, value; if (jvm_arg->disass_class) { printf("%sn", symbol); return 0; } pop_operand_stack(int, value) pop_operand_stack(int, index) pop_operand_stack(int, addr) printf("addr: 0x%xtindex: %dt%dn", addr, index, value); *(int *)(addr + index) = value; if (inc_obj_ref(addr, (&jvm_obj_list_head)) == -1) { jvm_error(VM_ERROR_INTERP, "inc jvm obj ref failed.n"); return -1; } jvm_pc.pc += len; return 0; }
对于数组data1, 同样进行了内存分配, 但是始终没有被引用到, 所以data1将会是gc回收时要释放的对象。
void start_gc(struct list_head *list_head) { JVM_OBJECT *s; struct list_head *p, *q; list_for_each_safe(p, q, list_head) { s = list_entry(p, JVM_OBJECT, list); if (s && s->ref_count == 0) { printf("free addr: 0x%xtsize: %dtref_count: %dn", s->addr, s->size, s->ref_count); list_del(p); free_jvm_obj(s); } } }
这是ajvm最简单的gc算法了, 后续将会对其进行优化。
四、演示执行
下面是ajvm对上述java代码的解释和执行过程:
$./wvm -c test test6 jvm pc init at: 0x630510 main ([Ljava/lang/String;)V stack: 3 local : 5 code: 0x3 0x36 0x4 0x5 0xbc 0xa 0x4c 0x3 0x3e 0x1d 0x5 0xa2 0x0 0xd 0x2b 0x1d 0x1d 0x4f 0x84 0x3 0x1 0xa7 0xff 0xf4 0x6 0xbc 0xa 0x4d 0xb1 #local at: 0x630540 #stack at: 0x630554 [ 1] iconst_0 pc: 0x630510 -> 0x3 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 2] istore pc: 0x630511 -> 0x36 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 3] iconst_2 pc: 0x630513 -> 0x5 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x2 0x0 0x0 [ 4] newarray pc: 0x630514 -> 0xbc #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 5] astore_1 pc: 0x630516 -> 0x4c #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 6] iconst_0 pc: 0x630517 -> 0x3 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 7] istore_3 pc: 0x630518 -> 0x3e #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 8] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 9] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x2 0x0 [ 10] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x2 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 11] aload_1 pc: 0x63051e -> 0x2b #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 12] iload_3 pc: 0x63051f -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 13] iload_3 pc: 0x630520 -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 14] iastore pc: 0x630521 -> 0x4f #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 15] iinc pc: 0x630522 -> 0x84 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 16] goto pc: 0x630525 -> 0xa7 [ 17] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 [ 18] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x2 0x0 [ 19] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x2 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 20] aload_1 pc: 0x63051e -> 0x2b #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 [ 21] iload_3 pc: 0x63051f -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 [ 22] iload_3 pc: 0x630520 -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x1 [ 23] iastore pc: 0x630521 -> 0x4f #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x1 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 24] iinc pc: 0x630522 -> 0x84 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 [ 25] goto pc: 0x630525 -> 0xa7 [ 26] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 [ 27] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x2 0x0 [ 28] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x2 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 [ 29] iconst_3 pc: 0x630528 -> 0x6 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x3 0x0 0x0 [ 30] newarray pc: 0x630529 -> 0xbc #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x3 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x627c80 0x0 0x0 [ 31] astore_2 pc: 0x63052b -> 0x4d #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x627c80 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x627c80 0x2 0x0 #stack: 0x0 0x0 0x0 [ 32] return pc: 0x63052c -> 0xb1 #local: 0x0 0x627c20 0x627c80 0x2 0x0 #stack: 0x0 0x0 0x0 jvm stack depth is zero. interpret bytecode done.