3. TableGen生成的代码
3.1. 概述
在编译LLVM时,首先会调用TableGen解析TD文件,产生C++源代码,然后这些C++源代码与LLVM的其他源代码一起被编译为LLVM执行文件。需要解析哪些TD文件是由LLVM/lib/target/target下的Cmakelists.txt文件指定。比如X86机器使用的TD文件有:
tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info)
tablegen(LLVM X86GenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) <-- v7.0增加
tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
括号中最后一项是tablegen的命令行选项,第一项是输出的文件。
TableGen本身是一个进程,因此它的入口是一个main()函数(TableGen.cpp)。
174 int main(int argc, char **argv) {
175 sys::PrintStackTraceOnErrorSignal();
176 PrettyStackTraceProgram X(argc, argv);
177 cl::ParseCommandLineOptions(argc, argv);
178
179 return TableGenMain(argv[0], &LLVMTableGenMain);
180 }
177行的cl::ParseCommandLineOptions()是LLVM里的通用命令行选项解析方法。它所支持的命令行选项是可以动态指定的。因此,它适用于一切需要解析命令行选项的地方。这里,我们不深入其细节。
在TableGenMain()定义中,OutputFilename,DependFilename,InputFilename以及IncludeDirs也都是命令行选项对象(它们声明在TableGen目录下的main.cpp中,与TableGen.cpp中声明的命令行选项一起构成TableGen可用的命令行选项,并由177行的ParseCommandLineOptions()完成解析。LLVM的代码有许多这样“神奇的”代码。简而言之,这里利用了C++构造函数在TableGen启动时将这些命令行选项对象注册到解析器中)。
73 int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
74 RecordKeeper Records;
75
76 // Parse the input file.
77 ErrorOr
78 MemoryBuffer::getFileOrSTDIN(InputFilename);
79 if (std::error_code EC = FileOrErr.getError()) {
80 errs() << "Could not open input file '" << InputFilename
81 << "': " << EC.message() << "\n";
82 return 1;
83 }
84
85 // Tell SrcMgr about this buffer, which is what TGParser will pick up.
86 SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), SMLoc());
87
88 // Record the location of the include directory so that the lexer can find
89 // it later.
90 SrcMgr.setIncludeDirs(IncludeDirs);
91
92 TGParser Parser(SrcMgr, Records);
93
94 if (Parser.ParseFile())
95 return 1;
96
97 std::error_code EC;
98 tool_output_file Out(OutputFilename, EC, sys::fs::F_Text);
99 if (EC) {
100 errs() << argv0 << ": error opening " << OutputFilename << ":"
101 << EC.message() << "\n";
102 return 1;
103 }
104 if (!DependFilename.empty()) {
105 if (int Ret = createDependencyFile(Parser, argv0))
106 return Ret;
107 }
108
109 if (MainFn(Out.os(), Records))
110 return 1;
111
112 if (ErrorsPrinted > 0) {
113 errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
114 return 1;
115 }
116
117 // Declare success.
118 Out.keep();
119 return 0;
120 }
92行的TGParser实例就是TD描述文件的解析器,94行调用其ParseFile()对指定的TD文件进行解析。注意在92行传入的Records,它是一个RecordKeeper实例(74行)。在TD文件中所有的class、def都将解析为LLVM的Record对象,并记录在这个RecordKeeper实例中。109行的MainFn在前面被绑定到LLVMTableGenMain(),它将根据TD文件解析的结果产生LLVM源代码。它是我们学习的重点。
94 bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
95 switch (Action) {
96 case PrintRecords:
97 OS << Records; // No argument, dump all contents
98 break;
case DumpJSON: <-- v7.0增加
EmitJSON(Records, OS);
break;
99 case GenEmitter:
100 EmitCodeEmitter(Records, OS);
101 break;
102 case GenRegisterInfo:
103 EmitRegisterInfo(Records, OS);
104 break;
105 case GenInstrInfo:
106 EmitInstrInfo(Records, OS);
107 break;
case GenInstrDocs: <-- v7.0增加
EmitInstrDocs(Records, OS);
break;
108 case GenCallingConv:
109 EmitCallingConv(Records, OS);
110 break;
111 case GenAsmWriter:
112 EmitAsmWriter(Records, OS);
113 break;
114 case GenAsmMatcher:
115 EmitAsmMatcher(Records, OS);
116 break;
117 case GenDisassembler:
118 EmitDisassembler(Records, OS);
119 break;
120 case GenPseudoLowering:
121 EmitPseudoLowering(Records, OS);
122 break;
case GenCompressInst: <-- v7.0增加
EmitCompressInst(Records, OS);
break;
123 case GenDAGISel:
124 EmitDAGISel(Records, OS);
125 break;
126 case GenDFAPacketizer:
127 EmitDFAPacketizer(Records, OS);
128 break;
129 case GenFastISel:
130 EmitFastISel(Records, OS);
131 break;
132 case GenSubtarget:
133 EmitSubtarget(Records, OS);
134 break;
135 case GenIntrinsic: <-- v7.0删除
136 EmitIntrinsics(Records, OS);
137 break;
138 case GenTgtIntrinsic:
139 EmitIntrinsics(Records, OS, true);
140 break;
case GenIntrinsicEnums: <-- v7.0增加
EmitIntrinsicEnums(Records, OS);
break;
case GenIntrinsicImpl:
EmitIntrinsicImpl(Records, OS);
break;
case GenTgtIntrinsicEnums:
EmitIntrinsicEnums(Records, OS, true);
break;
case GenTgtIntrinsicImpl:
EmitIntrinsicImpl(Records, OS, true);
break;
141 case GenOptParserDefs:
142 EmitOptParser(Records, OS);
143 break;
144 case PrintEnums:
145 {
146 for (Record *Rec : Records.getAllDerivedDefinitions(Class))
147 OS << Rec->getName() << ", ";
148 OS << "\n";
149 break;
150 }
151 case PrintSets:
152 {
153 SetTheory Sets;
154 Sets.addFieldExpander("Set", "Elements");
155 for (Record *Rec : Records.getAllDerivedDefinitions("Set")) {
156 OS << Rec->getName() << " = [";
157 const std::vector
158 assert(Elts && "Couldn't expand Set instance");
159 for (Record *Elt : *Elts)
160 OS << ' ' << Elt->getName();
161 OS << " ]\n";
162 }
163 break;
164 }
165 case GenCTags:
166 EmitCTags(Records, OS);
167 break;
case GenAttributes: <-- v7.0增加
EmitAttributes(Records, OS);
break;
case GenSearchableTables:
EmitSearchableTables(Records, OS);
break;
case GenGlobalISel:
EmitGlobalISel(Records, OS);
break;
case GenRegisterBank:
EmitRegisterBank(Records, OS);
break;
case GenX86EVEX2VEXTables:
EmitX86EVEX2VEXTables(Records, OS);
break;
case GenX86FoldTables:
EmitX86FoldTables(Records, OS);
break;
168 }
169
170 return false;
171 }
95行的Action就是在TableGen.cpp中声明的命令行选项对象,它长成这个样子:
48 cl::opt
49 Action(cl::desc("Action to perform:"),
50 cl::values(clEnumValN(PrintRecords, "print-records",
51 "Print all records to stdout (default)"),
clEnumValN(DumpJSON, "dump-json", <-- v7.0增加
"Dump all records as machine-readable JSON"),
52 clEnumValN(GenEmitter, "gen-emitter",
53 "Generate machine code emitter"),
54 clEnumValN(GenRegisterInfo, "gen-register-info",
55 "Generate registers and register classes info"),
56 clEnumValN(GenInstrInfo, "gen-instr-info",
57 "Generate instruction descriptions"),
clEnumValN(GenInstrDocs, "gen-instr-docs", <-- v7.0增加
"Generate instruction documentation"),
58 clEnumValN(GenCallingConv, "gen-callingconv",
59 "Generate calling convention descriptions"),
60 clEnumValN(GenAsmWriter, "gen-asm-writer",
61 "Generate assembly writer"),
62 clEnumValN(GenDisassembler, "gen-disassembler",
63 "Generate disassembler"),
64 clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
65 "Generate pseudo instruction lowering"),
clEnumValN(GenCompressInst, "gen-compress-inst-emitter", <-- v7.0增加
"Generate RISCV compressed instructions."),
66 clEnumValN(GenAsmMatcher, "gen-asm-matcher",
67 "Generate assembly instruction matcher"),
68 clEnumValN(GenDAGISel, "gen-dag-isel",
69 "Generate a DAG instruction selector"),
70 clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
71 "Generate DFA Packetizer for VLIW targets"),
72 clEnumValN(GenFastISel, "gen-fast-isel",
73 "Generate a \"fast\" instruction selector"),
74 clEnumValN(GenSubtarget, "gen-subtarget",
75 "Generate subtarget enumerations"),
76 clEnumValN(GenIntrinsic, "gen-intrinsic", <-- v7.0删除
77 "Generate intrinsic information"),
78 clEnumValN(GenTgtIntrinsic, "gen-tgt-intrinsic",
79 "Generate target intrinsic information"),
clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", <-- v7.0增加
"Generate intrinsic enums"),
clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
"Generate intrinsic information"),
clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums",
"Generate target intrinsic enums"),
clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl",
"Generate target intrinsic information"),
80 clEnumValN(PrintEnums, "print-enums",
81 "Print enum values for a class"),
82 clEnumValN(PrintSets, "print-sets",
83 "Print expanded sets for testing DAG exprs"),
84 clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
85 "Generate option definitions"),
86 clEnumValN(GenCTags, "gen-ctags",
87 "Generate ctags-compatible index"),
clEnumValN(GenAttributes, "gen-attrs", <-- v7.0增加
"Generate attributes"),
clEnumValN(GenSearchableTables, "gen-searchable-tables",
"Generate generic binary-searchable table"),
clEnumValN(GenGlobalISel, "gen-global-isel",
"Generate GlobalISel selector"),
clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
"Generate X86 EVEX to VEX compress tables"),
clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
"Generate X86 fold tables"),
clEnumValN(GenRegisterBank, "gen-register-bank",
"Generate registers bank descriptions")));
88 clEnumValEnd)); <-- v7.0删除
这里我们不深入其中的细节(否则要花不少时间),只要知道,如果出现了其中一个选项,比如-gen-intrinsic,Action的值就会被设置为对应的枚举值,即GenIntrinsic。95行的switch语句根据这个枚举值调用对应的处理方法。注意,80行以下是一些调试选项。
3.2. TD文件的解析结果
前面说过dag,bit,string,int与list这些类型的对象只允许出现在class,multiclass,def,defm及foreach等声明里。因此,一个TD文件由一系列class,multiclass,def,defm等构成。在解析过程中,LLVM对这些定义一一构建一个Record实例。而在class,multiclass,def,defm定义中出现的各个域,则构建为对应的Init派生对象,保存在相应的Record实例中。
Record实例的匹配,以及在Record实例中查找指定的域是通过名字匹配来完成的。
所有Record实例都记录在TableGenMain的局部变量Records中,其方法getAllDerivedDefinitions (ClassName)返回所有从ClassName派生的def或defm对象。
下面我们将选取对代码生成、指令选择最为紧要的TableGen生成代码,研究其产生的过程及结果。