了解java class文件格式是读懂asm代码的前提,所以在此之前请找Java虚拟机规范里边class文件格式章节细读一下。
不过在详解读入过程之前,先简单看下jvm规范中对class文件格式的定义,其中一个u代表一个byte,u1、u2、u4分别代表1、2、4个字节:
ClassFile {
u4 magic;
u2 minor_version;
u2 major_version;
u2 constant_pool_count;
cp_info constant_pool[constant_pool_count-1];
u2 access_flags;
u2 this_class;
u2 super_class;
u2 interfaces_count;
u2 interfaces[interfaces_count];
u2 fields_count;
field_info fields[fields_count];
u2 methods_count;
method_info methods[methods_count];
u2 attributes_count;
attribute_info attributes[attributes_count];
}
我们用一个简单的例子作为切入点:用 ClassReader读入class文件内容,构造一个ClassNode的过程:
public static void main(String[] args) throws IOException {
ClassReader cr = new ClassReader("com.tmall.buy.FullClass");//ClassReader只是按顺序遍历一遍class文件内容,基本不做信息的缓存,类似于
ClassNode cn = new ClassNode();//ClassNode是会缓存下class相关的信息的,类似于dom方法访问xml
cr.accept(cn, ClassReader.EXPAND_FRAMES);
System.out.println(cn.name);
}
其中
ClassReader("com.tmall.buy.FullClass"),
会读入class文件;其实现:
readClass(ClassLoader.getSystemResourceAsStream(name.replace('.', '/') + ".class"), true)
读入过程是用的bootstrap class loader或者system class loader装载class,然后getResource的。读入的是class文件的字节数组,然后调用ClassReader的另外一个构造函数:
public ClassReader(final byte[] b)
-->
public ClassReader(final byte[] b, final int off, final int len)
来看下这个构造函数的实现:
public ClassReader(final byte[] b, final int off, final int len) {
this.b = b;
// checks the class version
if (readShort(off + 6) > Opcodes.V1_8) {//检查主版本号,major_version,此处Opcodes.V1_8是52,即1.8
throw new IllegalArgumentException();
}
// parses the constant pool
items = new int[readUnsignedShort(off + 8)];//读取常量池数量,constant_pool_count
int n = items.length;
strings = new String[n];
int max = 0;
int index = off + 10;
for (int i = 1; i < n; ++i) {//读取常量池数组内容
items[i] = index + 1;//保存常量池内容在class内容字节数组的开始位置,后续用到某个常量池时,可以直接从class文件字节数组中读
int size;
switch (b[index]) {//不同tag类型的常量池占用的字节长度不同,详见jvm spec
case ClassWriter.FIELD:
case ClassWriter.METH:
case ClassWriter.IMETH:
case ClassWriter.INT:
case ClassWriter.FLOAT:
case ClassWriter.NAME_TYPE:
case ClassWriter.INDY:
size = 5;
break;
case ClassWriter.LONG:
case ClassWriter.DOUBLE:
size = 9;
++i;
break;
case ClassWriter.UTF8:
size = 3 + readUnsignedShort(index + 1);
if (size > max) {
max = size;
}
break;
case ClassWriter.HANDLE:
size = 4;
break;
// case ClassWriter.CLASS:
// case ClassWriter.STR:
// case ClassWriter.MTYPE
default:
size = 3;
break;
}
index += size;
}
maxStringLength = max;//记录下了utf8常量池中最长字符串的长度
// the class header information starts just after the constant pool
header = index;//把到常量池结尾为止的字节长度作为了header的长度
}
这里并没有解析出常量池数组,只是记录了class文件header的长度。此处header包括魔数、主次版本号、常量池数量、每个常量池在class文件字节码数组中的起始字节位置。接下来就是access_flags、this_class、super_class了。
ClassReader.accept(final byte[] b)调用了同名accept不同参的accept方法:
public void accept(final ClassVisitor classVisitor,
final Attribute[] attrs, final int flags) {
int u = header; // current offset in the class file
char[] c = newchar[maxStringLength]; // buffer used to read strings 还记得这个maxStringLength吧
Context context = new Context();
context.attrs = attrs;
context.flags = flags;
context.buffer = c;
// reads the class declaration
int access = readUnsignedShort(u);//class的访问标示符
String name = readClass(u + 2, c);//读取class name,是从常量池读取的utf8字符串;从items数组读取到常量池字节内容的起始位置,读取具体内容,并缓存到strings数组中,后续如果多次读到会从缓存取
String superClass = readClass(u + 4, c);//读取超类class name
String[] interfaces = new String[readUnsignedShort(u + 6)];//读取实现的接口数量
u += 8;
for (int i = 0; i < interfaces.length; ++i) {
interfaces[i] = readClass(u, c);//读取接口name
u += 2;
}
// reads the class attributes
String signature = null;
String sourceFile = null;
String sourceDebug = null;
String enclosingOwner = null;
String enclosingName = null;
String enclosingDesc = null;
int anns = 0;
int ianns = 0;
int tanns = 0;
int itanns = 0;
int innerClasses = 0;
Attribute attributes = null;
u = getAttributes();//计算属性开始的位置,计算的时候是跳过了字段和方法相关信息
for (int i = readUnsignedShort(u); i > 0; --i) {
String attrName = readUTF8(u + 2, c);//从常量池读取属性名称
// tests are sorted in decreasing frequency order
// (based on frequencies observed on typical classes)
if ("SourceFile".equals(attrName)) {
sourceFile = readUTF8(u + 8, c);//源文件文件名
} else if ("InnerClasses".equals(attrName)) {
innerClasses = u + 8;//内部类数量
} else if ("EnclosingMethod".equals(attrName)) {
enclosingOwner = readClass(u + 8, c);
int item = readUnsignedShort(u + 10);
if (item != 0) {
enclosingName = readUTF8(items[item], c);
enclosingDesc = readUTF8(items[item] + 2, c);
}
} else if (SIGNATURES && "Signature".equals(attrName)) {//类型、属性等泛型相关
signature = readUTF8(u + 8, c);
} else if (ANNOTATIONS
&& "RuntimeVisibleAnnotations".equals(attrName)) {
anns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeVisibleTypeAnnotations".equals(attrName)) {
tanns = u + 8;
} else if ("Deprecated".equals(attrName)) {
access |= Opcodes.ACC_DEPRECATED;
} else if ("Synthetic".equals(attrName)) {
access |= Opcodes.ACC_SYNTHETIC
| ClassWriter.ACC_SYNTHETIC_ATTRIBUTE;
} else if ("SourceDebugExtension".equals(attrName)) {//扩展调试信息,对jvm来说没有实际的语义
int len = readInt(u + 4);
sourceDebug = readUTF(u + 8, len, new char[len]);
} else if (ANNOTATIONS
&& "RuntimeInvisibleAnnotations".equals(attrName)) {
ianns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeInvisibleTypeAnnotations".equals(attrName)) {
itanns = u + 8;
} else if ("BootstrapMethods".equals(attrName)) {
int[] bootstrapMethods = new int[readUnsignedShort(u + 8)];
for (int j = 0, v = u + 10; j < bootstrapMethods.length; j++) {
bootstrapMethods[j] = v;
v += 2 + readUnsignedShort(v + 2) << 1;
}
context.bootstrapMethods = bootstrapMethods;
} else {
Attribute attr = readAttribute(attrs, attrName, u + 8,
readInt(u + 4), c, -1, null);
if (attr != null) {
attr.next = attributes;
attributes = attr;
}
}
u += 6 + readInt(u + 4);
}
// visits the class declaration 这里这个classVisitor是ClassNode实力,visit方法会记录下传入的参数信息
classVisitor.visit(readInt(items[1] - 7), access, name, signature,
superClass, interfaces);
// visits the source and debug info
if ((flags & SKIP_DEBUG) == 0
&& (sourceFile != null || sourceDebug != null)) {
classVisitor.visitSource(sourceFile, sourceDebug);
}
// visits the outer class
if (enclosingOwner != null) {
classVisitor.visitOuterClass(enclosingOwner, enclosingName,
enclosingDesc);
}
// visits the class annotations and type annotations
if (ANNOTATIONS && anns != 0) {//Runtime Visible Annotation,什么时候会出现咧?
for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
classVisitor.visitAnnotation(readUTF8(v, c), true));//记录annotation
}
}
if (ANNOTATIONS && ianns != 0) {//Runtime Invisible Annotation
for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
classVisitor.visitAnnotation(readUTF8(v, c), false));
}
}
if (ANNOTATIONS && tanns != 0) {//Runtime Visible Type Annotation
for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
classVisitor.visitTypeAnnotation(context.target,
context.path, readUTF8(v, c), true));
}
}
if (ANNOTATIONS && itanns != 0) {//Runtime Invisible Type Annotation
for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
classVisitor.visitTypeAnnotation(context.target,
context.path, readUTF8(v, c), false));
}
}
// visits the attributes
while (attributes != null) {
Attribute attr = attributes.next;
attributes.next = null;
classVisitor.visitAttribute(attributes);//类非标准attributes
attributes = attr;
}
// visits the inner classes
if (innerClasses != 0) {
int v = innerClasses + 2;
for (int i = readUnsignedShort(innerClasses); i > 0; --i) {
classVisitor.visitInnerClass(readClass(v, c),
readClass(v + 2, c), readUTF8(v + 4, c),
readUnsignedShort(v + 6));//记录下内部类信息
v += 8;
}
}
// visits the fields and methods 此时u指向了field count结束的位置
u = header + 10 + 2 * interfaces.length;
for (int i = readUnsignedShort(u - 2); i > 0; --i) {//读取field count
u = readField(classVisitor, context, u);//读取field
}
u += 2;//跳过method count所在位置
for (int i = readUnsignedShort(u - 2); i > 0; --i) {//读取method count
u = readMethod(classVisitor, context, u);
}
// visits the end of the class
classVisitor.visitEnd();
}
来看跳过字段和方法,拿到属性开始位置的实现:
private int getAttributes() {
// skips the header 此时header是跳过magic number、minor version、major version、constant_pool_count、constant_pool[constant_pool_count - 1]之后的位置;这里+8是跳过当前类的access flag(u2)、class name(u2)、super class name(u2)、interface count(u2);readUnsignedShort(header + 6)就是读取interface count,* 2是所有interfaces占用的字节数
int u = header + 8 + readUnsignedShort(header + 6) * 2;
// skips fields and methods 现在u是从filed_count开始的
for (int i = readUnsignedShort(u); i > 0; --i) {//i初始化为filed_count的值
for (int j = readUnsignedShort(u + 8); j > 0; --j) {//u + 8位置开始的unsigned short是指field的attributes count(u2);8包括field_count(u2)、filed的access flag(u2)、name_index(u2)、descriptor_index(u2)
u += 6 + readInt(u + 12);//6是指attribute的前六个字节,attribute_name_index(u2)、attribute_length(u4);u+12开始这个int是attribute_length,此处12是初始化j处用到的8 + attributes_count(u2) + attribute_name_index(u2);注意这里只u += 6 + attributes info长度,即属性本身的长度,字段内容占的8个字节和field count(u2)没有算在内
}
u += 8;//可以理解为field占用的8个字节,包括access_flag(u2)、name_index(u2)、descriptor_index(u2)、attributes_count(u2)
}
u += 2;//可以理解为field_count(u2)占用的长度
for (int i = readUnsignedShort(u); i > 0; --i) {//读取方法内容的方式跟读取字段的方式一致;外层循环u+=8和循环后u+=2有时候容易搞不清楚的
for (int j = readUnsignedShort(u + 8); j > 0; --j) {
u += 6 + readInt(u + 12);
}
u += 8;
}
// the attribute_info structure starts just after the methods
return u + 2;
}
ClassReader中的classVisitor实例字段是ClassNode对象,根据ClassReader.accept方法中ClassNode方法出现的顺序来一个个看下:
/**
*记录下版本号、访问表示符、类名称、类型泛型信息、超类、接口信息
*/
public void visit(final int version, final int access, final String name,
final String signature, final String superName,
final String[] interfaces) {
this.version = version;
this.access = access;
this.name = name;
this.signature = signature;
this.superName = superName;
if (interfaces != null) {
this.interfaces.addAll(Arrays.asList(interfaces));
}
}
/**
* 记录下sourceFile我sourceDebug信息
*/
public void visitSource(final String file, final String debug) {
sourceFile = file;
sourceDebug = debug;
}
……其他方法比较简单,忽略。来看看readField的实现:
private int readField(final ClassVisitor classVisitor,
final Context context, int u) {
// reads the field declaration
char[] c = context.buffer;
int access = readUnsignedShort(u);//field access
String name = readUTF8(u + 2, c);//field name
String desc = readUTF8(u + 4, c);//descriptor
u += 6;
// reads the field attributes
String signature = null;
int anns = 0;
int ianns = 0;
int tanns = 0;
int itanns = 0;
Object value = null;
Attribute attributes = null;
for (int i = readUnsignedShort(u); i > 0; --i) {//field的属性
String attrName = readUTF8(u + 2, c);//属性名
// tests are sorted in decreasing frequency order
// (based on frequencies observed on typical classes)
if ("ConstantValue".equals(attrName)) {
int item = readUnsignedShort(u + 8);
value = item == 0 ? null : readConst(item, c);
} else if (SIGNATURES && "Signature".equals(attrName)) {
signature = readUTF8(u + 8, c);
} else if ("Deprecated".equals(attrName)) {
access |= Opcodes.ACC_DEPRECATED;
} else if ("Synthetic".equals(attrName)) {
access |= Opcodes.ACC_SYNTHETIC
| ClassWriter.ACC_SYNTHETIC_ATTRIBUTE;
} else if (ANNOTATIONS
&& "RuntimeVisibleAnnotations".equals(attrName)) {
anns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeVisibleTypeAnnotations".equals(attrName)) {
tanns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeInvisibleAnnotations".equals(attrName)) {
ianns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeInvisibleTypeAnnotations".equals(attrName)) {
itanns = u + 8;
} else {//非标准的属性
Attribute attr = readAttribute(context.attrs, attrName, u + 8,
readInt(u + 4), c, -1, null);
if (attr != null) {
attr.next = attributes;
attributes = attr;
}
}
u += 6 + readInt(u + 4);//6表示 attribute_name_index(u2)、attribute_length(u4),没计算attribute count(u2)的
}
u += 2;//attribute count(u2)
// visits the field declaration
FieldVisitor fv = classVisitor.visitField(access, name, desc,
signature, value);//给ClassNode添加一个FieldNode;后续会给FieldNode添加attribute
if (fv == null) {
return u;
}
// visits the field annotations and type annotations
if (ANNOTATIONS && anns != 0) {
for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
fv.visitAnnotation(readUTF8(v, c), true));
}
}
if (ANNOTATIONS && ianns != 0) {
for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
fv.visitAnnotation(readUTF8(v, c), false));
}
}
if (ANNOTATIONS && tanns != 0) {
for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
fv.visitTypeAnnotation(context.target, context.path,
readUTF8(v, c), true));
}
}
if (ANNOTATIONS && itanns != 0) {
for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
fv.visitTypeAnnotation(context.target, context.path,
readUTF8(v, c), false));
}
}
// visits the field attributes
while (attributes != null) {
Attribute attr = attributes.next;
attributes.next = null;
fv.visitAttribute(attributes);
attributes = attr;
}
// visits the end of the field
fv.visitEnd();
return u;
}
readMethod实现,跟readField很像,读取方法访问标示、方法名、描述符、属性等,最重要的在最后readCode,读取方法体字节码内容:
private int readMethod(final ClassVisitor classVisitor,
final Context context, int u) {
// reads the method declaration
char[] c = context.buffer;
// 节操碎了一地的context君,每次readMethod的时候都会缓存当前方法的access、name、desc等信息,后续使用;使用完后下次就被覆盖了
context.access = readUnsignedShort(u);//access flag
context.name = readUTF8(u + 2, c);//方法名
context.desc = readUTF8(u + 4, c);//方法描述符
u += 6;
// reads the method attributes
int code = 0;
int exception = 0;
String[] exceptions = null;
String signature = null;
int anns = 0;
int ianns = 0;
int tanns = 0;
int itanns = 0;
int dann = 0;
int mpanns = 0;
int impanns = 0;
int firstAttribute = u;
Attribute attributes = null;
for (int i = readUnsignedShort(u); i > 0; --i) {
String attrName = readUTF8(u + 2, c);
// tests are sorted in decreasing frequency order
// (based on frequencies observed on typical classes)
if ("Code".equals(attrName)) {
if ((context.flags & SKIP_CODE) == 0) {
code = u + 8;
}
} else if ("Exceptions".equals(attrName)) {
exceptions = new String[readUnsignedShort(u + 8)];
exception = u + 10;
for (int j = 0; j < exceptions.length; ++j) {
exceptions[j] = readClass(exception, c);
exception += 2;
}
} else if (SIGNATURES && "Signature".equals(attrName)) {
signature = readUTF8(u + 8, c);
} else if ("Deprecated".equals(attrName)) {
context.access |= Opcodes.ACC_DEPRECATED;
} else if (ANNOTATIONS
&& "RuntimeVisibleAnnotations".equals(attrName)) {
anns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeVisibleTypeAnnotations".equals(attrName)) {
tanns = u + 8;
} else if (ANNOTATIONS && "AnnotationDefault".equals(attrName)) {
dann = u + 8;
} else if ("Synthetic".equals(attrName)) {
context.access |= Opcodes.ACC_SYNTHETIC
| ClassWriter.ACC_SYNTHETIC_ATTRIBUTE;
} else if (ANNOTATIONS
&& "RuntimeInvisibleAnnotations".equals(attrName)) {
ianns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeInvisibleTypeAnnotations".equals(attrName)) {
itanns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeVisibleParameterAnnotations".equals(attrName)) {
mpanns = u + 8;
} else if (ANNOTATIONS
&& "RuntimeInvisibleParameterAnnotations".equals(attrName)) {
impanns = u + 8;
} else {
Attribute attr = readAttribute(context.attrs, attrName, u + 8,
readInt(u + 4), c, -1, null);
if (attr != null) {
attr.next = attributes;
attributes = attr;
}
}
u += 6 + readInt(u + 4);
}
u += 2;
// visits the method declaration
MethodVisitor mv = classVisitor.visitMethod(context.access,
context.name, context.desc, signature, exceptions);//ClassNode会创建并记录一个MethodNode对象
if (mv == null) {
return u;
}
/*
* if the returned MethodVisitor is in fact a MethodWriter, it means
* there is no method adapter between the reader and the writer. If, in
* addition, the writer's constant pool was copied from this reader
* (mw.cw.cr == this), and the signature and exceptions of the method
* have not been changed, then it is possible to skip all visit events
* and just copy the original code of the method to the writer (the
* access, name and descriptor can have been changed, this is not
* important since they are not copied as is from the reader).
*/
if (WRITER && mv instanceof MethodWriter) {
MethodWriter mw = (MethodWriter) mv;
if (mw.cw.cr == this && signature == mw.signature) {
boolean sameExceptions = false;
if (exceptions == null) {
sameExceptions = mw.exceptionCount == 0;
} else if (exceptions.length == mw.exceptionCount) {
sameExceptions = true;
for (int j = exceptions.length - 1; j >= 0; --j) {
exception -= 2;
if (mw.exceptions[j] != readUnsignedShort(exception)) {
sameExceptions = false;
break;
}
}
}
if (sameExceptions) {
/*
* we do not copy directly the code into MethodWriter to
* save a byte array copy operation. The real copy will be
* done in ClassWriter.toByteArray().
*/
mw.classReaderOffset = firstAttribute;
mw.classReaderLength = u - firstAttribute;
return u;
}
}
}
// visits the method annotations
if (ANNOTATIONS && dann != 0) {
AnnotationVisitor dv = mv.visitAnnotationDefault();
readAnnotationValue(dann, c, null, dv);
if (dv != null) {
dv.visitEnd();
}
}
if (ANNOTATIONS && anns != 0) {
for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
mv.visitAnnotation(readUTF8(v, c), true));
}
}
if (ANNOTATIONS && ianns != 0) {
for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) {
v = readAnnotationValues(v + 2, c, true,
mv.visitAnnotation(readUTF8(v, c), false));
}
}
if (ANNOTATIONS && tanns != 0) {
for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
mv.visitTypeAnnotation(context.target, context.path,
readUTF8(v, c), true));
}
}
if (ANNOTATIONS && itanns != 0) {
for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) {
v = readAnnotationTarget(context, v);
v = readAnnotationValues(v + 2, c, true,
mv.visitTypeAnnotation(context.target, context.path,
readUTF8(v, c), false));
}
}
if (ANNOTATIONS && mpanns != 0) {
readParameterAnnotations(mv, context, mpanns, true);
}
if (ANNOTATIONS && impanns != 0) {
readParameterAnnotations(mv, context, impanns, false);
}
// visits the method attributes
while (attributes != null) {
Attribute attr = attributes.next;
attributes.next = null;
mv.visitAttribute(attributes);
attributes = attr;
}
// visits the method code
if (code != 0) {
mv.visitCode();
readCode(mv, context, code);//解析字节码,最重要的地方
}
// visits the end of the method
mv.visitEnd();
return u;
}
MethodNode会记录方法相关的信息,同时会初始化存储local variables、tryCatch、exception、instruction的数据结构
public MethodNode(final int api, final int access, final String name,
final String desc, final String signature, final String[] exceptions) {
super(api);
this.access = access;
this.name = name;
this.desc = desc;
this.signature = signature;
this.exceptions = new ArrayList<String>(exceptions == null ? 0
: exceptions.length);
boolean isAbstract = (access & Opcodes.ACC_ABSTRACT) != 0;
if (!isAbstract) {
this.localVariables = new ArrayList<LocalVariableNode>(5);
}
this.tryCatchBlocks = new ArrayList<TryCatchBlockNode>();
if (exceptions != null) {
this.exceptions.addAll(Arrays.asList(exceptions));
}
this.instructions = new InsnList();
}
最最重要的readCode的实现我们下篇分析。
《未完待续》