llc源码解析

llc指令用于将LLVM源输入编译成特定架构的汇编语言,然后,汇编语言输出可以通过本机汇编器和链接器来生成本机可执行文件。输出汇编代码的体系结构选择是从输入文件自动确定的,除非使用该-march选项覆盖默认值。

-O=uint     //指定clang优化级别生成代码 -O0,-O1,-O2,-O3
-march=           //指定架构生产汇编
-filetype=        //指定输出类型,.s汇编或.o目标文件

具体使用及配置可以查看官网查看。

以下是对其源码的解析:

int main(int argc, char **argv) {
//初始化,作用:1。设置一个信号处理程序,以便在进程崩溃时追踪堆栈,2.创建全局的handler用于当内存分配失败时调用它 3.在Windows平台可通过命令行获取utf-8编码参数
  InitLLVM X(argc, argv);

  // Enable debug stream buffering.
  EnableDebugBuffering = true;
    // 上下文,拥有并管理了LLVM core内部的全局数据,包括类型和常量表。但LLVMContext本身不提供锁保证,所以应该注意每个线程对应一个上下文
  LLVMContext Context;

  // Initialize targets first, so that --version shows registered targets.
  // 初始化目标机器,初始化对应机器码,以及支持asm打印 具体类型可查看Targets.def 
  InitializeAllTargets();
  InitializeAllTargetMCs();
  InitializeAllAsmPrinters();
  InitializeAllAsmParsers();

  // PassRegistry用于注册和初始化pass子系统,并帮助PassManager解析pass依赖关系。 初始化 codegen 以及 IR passes 以便 -print-after,
  // -print-before, and -stop-after 选项正常工作.
  PassRegistry *Registry = PassRegistry::getPassRegistry();
  initializeCore(*Registry);
  initializeCodeGen(*Registry);
  initializeLoopStrengthReducePass(*Registry);
  initializeLowerIntrinsicsPass(*Registry);
  initializeEntryExitInstrumenterPass(*Registry);
  initializePostInlineEntryExitInstrumenterPass(*Registry);
  initializeUnreachableBlockElimLegacyPassPass(*Registry);
  initializeConstantHoistingLegacyPassPass(*Registry);
  initializeScalarOpts(*Registry);
  initializeVectorization(*Registry);
  initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
  initializeExpandReductionsPass(*Registry);
  initializeHardwareLoopsPass(*Registry);
  initializeTransformUtils(*Registry);

  // Initialize debugging passes.
  initializeScavengerTestPass(*Registry);

  // Register the target printer for --version.
  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);

  cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
    // 设置除GlobalValue之外的Value是否保留name
  Context.setDiscardValueNames(DiscardValueNames);

  // Set a diagnostic handler that doesn't exit on the first error
  bool HasError = false;
  Context.setDiagnosticHandler(
      std::make_unique(&HasError));
  Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);

  Expected> RemarksFileOrErr =
      setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
                                   RemarksFormat, RemarksWithHotness,
                                   RemarksHotnessThreshold);
  if (Error E = RemarksFileOrErr.takeError()) {
    WithColor::error(errs(), argv[0]) << toString(std::move(E)) << '\n';
    return 1;
  }
  std::unique_ptr RemarksFile = std::move(*RemarksFileOrErr);
    // 判断input是否正确
  if (InputLanguage != "" && InputLanguage != "ir" &&
      InputLanguage != "mir") {
    WithColor::error(errs(), argv[0])
        << "input language must be '', 'IR' or 'MIR'\n";
    return 1;
  }

  // 重复编译N次来给出更好的时间指标
  for (unsigned I = TimeCompilations; I; --I)
    // 编译子模块, 具体实现在下一段
    if (int RetVal = compileModule(argv, Context))
      return RetVal;

  if (RemarksFile)
    // 指示此输出文件的工具作业已成功,并且不应删除该文件
    RemarksFile->keep();
  return 0;
}
static int compileModule(char **argv, LLVMContext &Context) {
  // Load the module to be compiled...
    // 异常诊断
  SMDiagnostic Err;
  std::unique_ptr M;
    // 用于从MIR文件加载状态来初始化机器函数
  std::unique_ptr MIR;
    // 用于处理autoconf配置
  Triple TheTriple;
  std::string CPUStr = codegen::getCPUStr(),
              FeaturesStr = codegen::getFeaturesStr();

    // 通过命令行参数设置从MIR加载函数的属性
  auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) {
    codegen::setFunctionAttributes(CPUStr, FeaturesStr, F);
  };

  auto MAttrs = codegen::getMAttrs();
  bool SkipModule = codegen::getMCPU() == "help" ||
                    (!MAttrs.empty() && MAttrs.front() == "help");
    // 设置优化级别
  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
  switch (OptLevel) {
  default:
    WithColor::error(errs(), argv[0]) << "invalid optimization level.\n";
    return 1;
  case ' ': break;
  case '0': OLvl = CodeGenOpt::None; break;
  case '1': OLvl = CodeGenOpt::Less; break;
  case '2': OLvl = CodeGenOpt::Default; break;
  case '3': OLvl = CodeGenOpt::Aggressive; break;
  }
    
    // 设置编译目标参数
  TargetOptions Options;
  auto InitializeOptions = [&](const Triple &TheTriple) {
    Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
    Options.DisableIntegratedAS = NoIntegratedAssembler;
    Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
    Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory;
    Options.MCOptions.AsmVerbose = AsmVerbose;
    Options.MCOptions.PreserveAsmComments = PreserveComments;
    Options.MCOptions.IASSearchPaths = IncludeDirs;
    Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
  };

  Optional RM = codegen::getExplicitRelocModel();

  const Target *TheTarget = nullptr;
  std::unique_ptr Target;

    // 如果用户只是想列出可用的选项,跳过模块加载
  if (!SkipModule) {
    auto SetDataLayout =
        [&](StringRef DataLayoutTargetTriple) -> Optional {
      // If we are supposed to override the target triple, do so now.
      std::string IRTargetTriple = DataLayoutTargetTriple.str();
      if (!TargetTriple.empty())
        IRTargetTriple = Triple::normalize(TargetTriple);
      TheTriple = Triple(IRTargetTriple);
      if (TheTriple.getTriple().empty())
        TheTriple.setTriple(sys::getDefaultTargetTriple());

            // 根据名称查找到要编译的指定架构,因为可能存在后端没有映射到目标triple的情况
      std::string Error;
      TheTarget =
          TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
      if (!TheTarget) {
        WithColor::error(errs(), argv[0]) << Error;
        exit(1);
      }

      // On AIX, setting the relocation model to anything other than PIC is
      // considered a user error.
      if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
        WithColor::error(errs(), argv[0])
            << "invalid relocation model, AIX only supports PIC.\n";
        exit(1);
      }

            // 根据配置创建对应目标机器
      InitializeOptions(TheTriple);
      Target = std::unique_ptr(TheTarget->createTargetMachine(
          TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
          codegen::getExplicitCodeModel(), OLvl));
      assert(Target && "Could not allocate target machine!");

      return Target->createDataLayout().getStringRepresentation();
    };
      
      // 根据输入的文件格式解析生成Module
    if (InputLanguage == "mir" ||
        (InputLanguage == "" && StringRef(InputFilename).endswith(".mir"))) {
      MIR = createMIRParserFromFile(InputFilename, Err, Context,
                                    setMIRFunctionAttributes);
      if (MIR)
        M = MIR->parseIRModule(SetDataLayout);
    } else {
      M = parseIRFile(InputFilename, Err, Context, SetDataLayout);
    }
    if (!M) {
      Err.print(argv[0], WithColor::error(errs(), argv[0]));
      return 1;
    }
    if (!TargetTriple.empty())
      M->setTargetTriple(Triple::normalize(TargetTriple));
  } else {
    // 加载help选项case
    TheTriple = Triple(Triple::normalize(TargetTriple));
    if (TheTriple.getTriple().empty())
      TheTriple.setTriple(sys::getDefaultTargetTriple());

      // 根据名称查找到要编译的指定架构
    std::string Error;
    TheTarget =
        TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
    if (!TheTarget) {
      WithColor::error(errs(), argv[0]) << Error;
      return 1;
    }

    // On AIX, setting the relocation model to anything other than PIC is
    // considered a user error. (AIX是IBM专有的UNIX操作系统)
    if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
      WithColor::error(errs(), argv[0])
          << "invalid relocation model, AIX only supports PIC.\n";
      return 1;
    }

      // 根据配置创建对应目标机器
    InitializeOptions(TheTriple);
    Target = std::unique_ptr(TheTarget->createTargetMachine(
        TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
        codegen::getExplicitCodeModel(), OLvl));
    assert(Target && "Could not allocate target machine!");

    // If we don't have a module then just exit now. We do this down
    // here since the CPU/Feature help is underneath the target machine
    // creation.
    return 0;
  }

  assert(M && "Should have exited if we didn't have a module!");
  if (codegen::getFloatABIForCalls() != FloatABI::Default)
    Options.FloatABIType = codegen::getFloatABIForCalls();

  // Figure out where we are going to send the output.
  std::unique_ptr Out =
      GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]);
  if (!Out) return 1;

    // 是否需要将dwarf文件单纯拆出到文件流
  std::unique_ptr DwoOut;
  if (!SplitDwarfOutputFile.empty()) {
    std::error_code EC;
    DwoOut = std::make_unique(SplitDwarfOutputFile, EC,
                                               sys::fs::OF_None);
    if (EC) {
      WithColor::error(errs(), argv[0]) << EC.message() << '\n';
      return 1;
    }
  }

    ///*核心*
  // Build up all of the passes that we want to do to the module.
  legacy::PassManager PM;

  // Add an appropriate TargetLibraryInfo pass for the module's triple.
  TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));

  // The -disable-simplify-libcalls flag actually disables all builtin optzns.
  if (DisableSimplifyLibCalls)
    TLII.disableAllFunctions();
  PM.add(new TargetLibraryInfoWrapperPass(TLII));

    // 立即验证模块,以便在任何pass调用doInitialization()之前捕获异常
  if (!NoVerify && verifyModule(*M, &errs())) {
    std::string Prefix =
        (Twine(argv[0]) + Twine(": ") + Twine(InputFilename)).str();
    WithColor::error(errs(), Prefix) << "input module is broken!\n";
    return 1;
  }

  // Override function attributes based on CPUStr, FeaturesStr, and command line
  // flags.
  codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);

    // 文件类型不是obj但是配置了-mc-relax-all时给出一个警告
  if (mc::getExplicitRelaxAll() && codegen::getFileType() != CGFT_ObjectFile)
    WithColor::warning(errs(), argv[0])
        << ": warning: ignoring -mc-relax-all because filetype != obj";

  {
    raw_pwrite_stream *OS = &Out->os();

    // Manually do the buffering rather than using buffer_ostream,
    // so we can memcmp the contents in CompileTwice mode
    SmallVector Buffer;
    std::unique_ptr BOS;
    if ((codegen::getFileType() != CGFT_AssemblyFile &&
         !Out->os().supportsSeeking()) ||
        CompileTwice) {
      BOS = std::make_unique(Buffer);
      OS = BOS.get();
    }

    const char *argv0 = argv[0];
      // LLVMTargetMachine继承自TargetMachine,然后继承出每种详细架构的目标机器,用于实现目标独立的代码生成器
    LLVMTargetMachine &LLVMTM = static_cast(*Target);
    MachineModuleInfoWrapperPass *MMIWP =
        new MachineModuleInfoWrapperPass(&LLVMTM);

      //如果包含了自定义的pass,则构建自定义pass通道,在选择指令之后开始
    if (!RunPassNames->empty()) {
      if (!MIR) {
        WithColor::warning(errs(), argv[0])
            << "run-pass is for .mir file only.\n";
        return 1;
      }
        // Target-Independent的配置,用于将options配置到CodeGen的其他pass中
      TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
      if (TPC.hasLimitedCodeGenPipeline()) {
        WithColor::warning(errs(), argv[0])
            << "run-pass cannot be used with "
            << TPC.getLimitedCodeGenPipelineReason(" and ") << ".\n";
        return 1;
      }
        
      TPC.setDisableVerify(NoVerify);
      PM.add(&TPC);
      PM.add(MMIWP);
        //dump并验证机器函数
      TPC.printAndVerify("");
      for (const std::string &RunPassName : *RunPassNames) {
        if (addPass(PM, argv0, RunPassName, TPC))
          return 1;
      }
        // 标示所有pass配置完成
      TPC.setInitialized();
        // 将IR按MIR格式序列打印输出
      PM.add(createPrintMIRPass(*OS));
        // 释放MachineFunction占用的内存
      PM.add(createFreeMachineFunctionPass());
        // 添加pass到指定的pass管理器,以生成指定的文件
    } else if (Target->addPassesToEmitFile(
                   PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
                   codegen::getFileType(), NoVerify, MMIWP)) {
      WithColor::warning(errs(), argv[0])
          << "target does not support generation of this"
          << " file type!\n";
      return 1;
    }

    const_cast(LLVMTM.getObjFileLowering())
        ->Initialize(MMIWP->getMMI().getContext(), *Target);
    if (MIR) {
      assert(MMIWP && "Forgot to create MMIWP?");
      if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
        return 1;
    }

    // Before executing passes, print the final values of the LLVM options.
    cl::PrintOptionValues();

//     If requested, run the pass manager over the same module again,
//     to catch any bugs due to persistent state in the passes. Note that
//     opt has the same functionality, so it may be worth abstracting this out
//     in the future.
    SmallVector CompileTwiceBuffer;
    if (CompileTwice) {
      std::unique_ptr M2(llvm::CloneModule(*M));
      PM.run(*M2);
      CompileTwiceBuffer = Buffer;
      Buffer.clear();
    }

    PM.run(*M);

    auto HasError =
        ((const LLCDiagnosticHandler *)(Context.getDiagHandlerPtr()))->HasError;
    if (*HasError)
      return 1;

    // Compare the two outputs and make sure they're the same
    if (CompileTwice) {
      if (Buffer.size() != CompileTwiceBuffer.size() ||
          (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
           0)) {
        errs()
            << "Running the pass manager twice changed the output.\n"
               "Writing the result of the second run to the specified output\n"
               "To generate the one-run comparison binary, just run without\n"
               "the compile-twice option\n";
        Out->os() << Buffer;
        Out->keep();
        return 1;
      }
    }

    if (BOS) {
      Out->os() << Buffer;
    }
  }

  // Declare success.
  Out->keep();
  if (DwoOut)
    DwoOut->keep();

  return 0;
}

你可能感兴趣的:(llc源码解析)