phasar LLVM静态分析框架介绍




# 安装
$ git clone
$ sudo ./
# 测试:若能获取正常输出,则表示安装正确。
$ phasar-llvm --module test/build_systems_tests/installation_tests/module.ll -D ifds-solvertest

架构:PhASAR调用LLVM API实现,可扩展性强,可以编写插件来增加功能。提供了命令行接口,也提供了封装的API来开发新工具。




要解决IFDS(Inter-procedural Finite Distributive Subset)问题,开发者需要定义一个类型,实现FlowFunctions接口。

template  struct FlowFunctions{
    virtual ~FlowFunctions() = default;
    // 用于处理所有的过程间流
    virtual FlowFunction *getnormalFlowFunction(N curr, N succ) = 0;
    // 用于处理调用站点(call-site)上的过程间流,通常,这个流函数工厂的任务是将位于给定调用站点的数据流映射到调用方法的作用域
    virtual FlowFunction *getCallFlowFunction (N callStmt, M destMthd) = 0;
    // 处理函数退出时候的过程间流(如Return)。这将被调用方的返回值以及可能通过引用或指针参数离开函数的数据流事实映射回调用方的上下文/范围。
    virtual FlowFunction *getRetFlowFunction (N callSite, M calleeMthd, N exitStmt, N retSite) = 0;
    // 沿调用站点传播调用中未涉及的所有数据流事实,通常是堆栈未被参数引用的本地数据。
    virtual FlowFunction *getCallToRetFlowFunction (N callSite, N retSite, set callees) = 0;



template  struct FlowFunction{    virtual ~FlowFunction() = default;    virtual set computeTargets(D source) = 0;}










3.1 使用示例——Example uses

// 示例程序 main.cpp
int main() {
    int i = 1;
    int j = 2;
    int k = i + j;
    return 0;


$ clang++ -emit-llvm -S main.cpp


; ModuleID = 'main.cpp'
source_filename = "main.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline norecurse nounwind optnone uwtable
define i32 @main() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  store i32 1, i32* %2, align 4
  store i32 2, i32* %3, align 4
  %5 = load i32, i32* %2, align 4
  %6 = load i32, i32* %3, align 4
  %7 = add nsw i32 %5, %6
  store i32 %7, i32* %4, align 4
  ret i32 0

attributes #0 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{
!llvm.ident = !{

!0 = !{
     i32 1, !"wchar_size", i32 4}
!1 = !{
     !"clang version 5.0.1 (tags/RELEASE_501/final 332326)"}


$ phasar-llvm -m path/to/your/main.ll -D ifds-solvertest

使用mem2reg pass来优化以上中间码,目标是尽可能多使用寄存器、少使用内存,少使用alloc:

$ opt -mem2reg -S main.ll
; ModuleID = 'main-mem2reg.ll'
source_filename = "main.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline norecurse nounwind uwtable
define dso_local i32 @main() #0 {
  %1 = add nsw i32 1, 2
  ret i32 0

attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{
!llvm.ident = !{

!0 = !{
     i32 1, !"wchar_size", i32 4}
!1 = !{
     !"clang version 8.0.0 "}
int function(int x) {
	return x + 1;

int main() {
	int i = 42;
	int j = function(i);
	return 0;


; ModuleID = 'main.cpp'
source_filename = "main.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline nounwind optnone uwtable
define i32 @_Z8functioni(i32) #0 {
  %2 = alloca i32, align 4
  store i32 %0, i32* %2, align 4
  %3 = load i32, i32* %2, align 4
  %4 = add nsw i32 %3, 1
  ret i32 %4

; Function Attrs: noinline norecurse nounwind optnone uwtable
define i32 @main() #1 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  store i32 42, i32* %2, align 4
  %4 = load i32, i32* %2, align 4
  %5 = call i32 @_Z8functioni(i32 %4)
  store i32 %5, i32* %3, align 4
  ret i32 0

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{
!llvm.ident = !{

!0 = !{
     i32 1, !"wchar_size", i32 4}
!1 = !{
     !"clang version 5.0.1 (tags/RELEASE_501/final 332326)"}

运行IFDSSolverTest分析,生成IFDS/IDE结果,和intra/inter monotone framework结果不同。

IFDS/IDE结果中,每个语句N处,都保存所有的data-flow facts D。

3.2 运行分析


# 使用-D选项
$ phasar-llvm -m module.ll -D IFDSSolverTest 
$ phasar-llvm -m module.ll -D IFDSUninitializedVariables


DataFlowAnalysisType Parameter
DataFlowAnalysisType::IFDSConstAnalysis “IFDSConstAnalysis”
DataFlowAnalysisType::IFDSLinearConstantAnalysis “IFDSLinearConstantAnalysis”
DataFlowAnalysisType::IFDSSolverTest “IFDSSolverTest”
DataFlowAnalysisType::IFDSTaintAnalysis “IFDSTaintAnalysis”
DataFlowAnalysisType::IFDSTypeAnalysis “IFDSTypeAnalysis”
DataFlowAnalysisType::IFDSUninitializedVariables “IFDSUninitializedVariables”
DataFlowAnalysisType::IDELinearConstantAnalysis “IDELinearConstantAnalysis”
DataFlowAnalysisType::IDESolverTest “IDESolverTest”
DataFlowAnalysisType::IDETaintAnalysis “IDETaintAnalysis”
DataFlowAnalysisType::IDETypeStateAnalysis “IDETypeStateAnalysis”
DataFlowAnalysisType::IntraMonoFullConstantPropagation “IntraMonoFullConstantPropagation”
DataFlowAnalysisType::IntraMonoSolverTest “IntraMonoSolverTest”
DataFlowAnalysisType::InterMonoSolverTest “InterMonoSolverTest”
DataFlowAnalysisType::InterMonoTaintAnalysis “InterMonoTaintAnalysis”
DataFlowAnalysisType::Plugin “Plugin”
DataFlowAnalysisType::None “None”
$ phasar-llvm --help 
PhASAR v0120
A LLVM-based static analysis framework

Allowed options:

Command-line options:
  -v [ --version ]                      Print PhASAR version
  -h [ --help ]                         Print help message
  --more-help                           Print more help
  -c [ --config ] arg                   Path to the configuration file, options
                                        can be specified as 'parameter = 
  -s [ --silent ]                       Suppress any non-result output

Configuration file options:
  -m [ --module ] arg                   Path to the module(s) under analysis
  -E [ --entry-points ] arg             Set the entry point(s) to be used
  -O [ --output ] arg (=results.json)   Filename for the results
  -D [ --data-flow-analysis ] arg       Set the analysis to be run
  --analysis-strategy arg (=WPA)
  --analysis-config arg                 Set the analysis's configuration (if 
  -P [ --pointer-analysis ] arg (=CFLAnders)
                                        Set the points-to analysis to be used 
                                        (CFLSteens, CFLAnders)
  -C [ --call-graph-analysis ] arg (=OTF)
                                        Set the call-graph algorithm to be used
                                        (NORESOLVE, CHA, RTA, DTA, VTA, OTF)
  -H [ --classhierarchy-analysis ]      Class-hierarchy analysis
  -S [ --statistical-analysis ]         Statistics
  -M [ --mwa ]                          Enable Modulewise-program analysis mode
  -R [ --printedgerec ]                 Print exploded-super-graph edge 
  -L [ --log ]                          Enable logging
  --emit-ir                             Emit preprocessed and annotated IR of 
                                        analysis target
  --emit-raw-results                    Emit unprocessed/raw solver results
  --emit-text-report                    Emit textual report of solver results
  --emit-graphical-report               Emit graphical report of solver results
  --emit-esg-as-dot                     Emit the Exploded super-graph (ESG) as 
                                        DOT graph
  --emit-th-as-text                     Emit the type hierarchy as text
  --emit-th-as-dot                      Emit the type hierarchy as DOT graph
  --emit-cg-as-text                     Emit the call graph as text
  --emit-cg-as-dot                      Emit the call graph as DOT graph
  --emit-pta-as-text                    Emit the points-to information as text
  --emit-pta-as-dot                     Emit the points-to information as DOT 
  --right-to-ludicrous-speed            Uses ludicrous speed (shared memory 
                                        parallelism) whenever possible
  --analysis-plugin arg                 Analysis plugin(s) (absolute path to 
                                        the shared object file(s))
  --callgraph-plugin arg                ICFG plugin (absolute path to the 
                                        shared object file)
  -I [ --project-id ] arg (=default-phasar-project)
                                        Project Id used for the database
  -A [ --pamm-out ] arg (=PAMM_data.json)
                                        Filename for PAMM's gathered data
$ clang++ -emit-llvm -S main.cpp
$ phasar-llvm -m path/to/your/main.ll -D IFDSSolverTest


