tick_plot__compile.ipynb
时长边界_时上链异数: 长短函数调用链列表
0. 用matplotlib找系统中字体文件大于1MB的
中文字体通常很大,这样过滤出的 通常有中文字体
结果中 看名字 ‘AR PL UMing CN’ 果然是中文字体
from matplotlib.font_manager import fontManager
import os
fonts = [font.name for font in fontManager.ttflist if
os.path.exists(font.fname) and os.stat(font.fname).st_size>1e6]
for font in fonts:
if 'CN' in font:
print(font)
AR PL UMing CN
AR PL UKai CN
!pip install scikit-learn
1. 产生tick日志
编译最小main加函数1语法错误 : 文件 mini_main_f1_err.c
char* calc_name(float age, bool high, char* nick){
if(high && age>5){
return "child";
}
char name[32]={"bigPeople"};
return name;
}
int main(int argc, char** argv){
calc_name(10,false,"nick");
return 0;
}
tick_save=true /pubx/build-llvm15/bin/clang-15 -c mini_main_printf.c
ls -lh mini_main_printf.o
tick_save=true /pubx/build-llvm15/bin/clang-15 mini_main_printf.c -o mmp
"""
pure virtual method called
terminate called without an active exception
clang-15: error: unable to execute command: Aborted (core dumped)
clang-15: error: clang frontend command failed due to signal (use -v to see invocation)
clang version 15.0.0 (git@gitcode.net:pubz/llvm-project.git 3387b19bb538e694d2d965d46c7b053d61a059e3)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /pubx/build-llvm15/bin
clang-15: note: diagnostic msg:
********************
PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
Preprocessed source(s) and associated run script(s) are located at:
clang-15: note: diagnostic msg: /tmp/mini_main_printf-f6d921.c
clang-15: note: diagnostic msg: /tmp/mini_main_printf-f6d921.sh
clang-15: note: diagnostic msg:
********************
malloc(): unsorted double linked list corrupted
malloc(): unsorted double linked list corrupted
malloc(): unsorted double linked list corrupted
malloc(): unsorted double linked list corrupted
malloc(): unsorted double linked list corrupted
...
段错误 (核心已转储)
"""
崩溃初步分析
崩溃初步分析
gdb查看哪崩溃的,可以看到很奇怪,main已经完成后崩溃了,日志正常输出了,暂时不管了
gdb --args /pubx/build-llvm15/bin/clang-15 -c mini_main_printf.c
(gdb) set environment tick_save=true
(gdb) run
Starting program: /build/pubx/build-llvm15/bin/clang-15 -c mini_main_printf.c
[Thread debugging using libthread_db enabled]
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff78a17c3 in unlink_chunk (p=p@entry=0x55555cbfc0b0, av=0x7ffff7a19c80 <main_arena>) at ./malloc/malloc.c:1634
1634 ./malloc/malloc.c: 没有那个文件或目录.
(gdb) bt
at ./stdlib/exit.c:113
, argc=argc@entry=3, argv=argv@entry=0x7fffffffdc38) at ../sysdeps/nptl/libc_start_call_main.h:74
, argc=3, argv=0x7fffffffdc38, init=, fini=, rtld_fini=,
stack_end=0x7fffffffdc28) at ../csu/libc-start.c:392
1. 前置
%config InlineBackend.rc={'figure.figsize': (30,20)}
sklearn 各种归一化
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MaxAbsScaler
minMaxScaler = MinMaxScaler()
standardScaler = StandardScaler()
maxAbsScaler = MaxAbsScaler()
导包
import numpy
import math
import seaborn
import plotly.offline as plotly_offline
plotly_offline.init_notebook_mode(connected=True)
import plotly.graph_objs as graph_objs
import plotly.figure_factory as figure_factory
from plotly.graph_objs import Scatter, Figure, Scatter3d
解决 matplotlib 中文不正常显示问题,中文显示为方块,原因是没有中文字体,这里 将字体设置为 上面找到的中文字体 ‘AR PL UMing CN’
import matplotlib.pyplot
matplotlib.pyplot.rcParams['font.family'] = 'AR PL UMing CN'
matplotlib.pyplot.rcParams['font.sans-serif'] = [ 'AR PL UMing CN']
pandas显示最大行数
import pandas
1b 作图前置
def inrease_g_size(g):
for lh in g.legend_.legendHandles:
lh.set_alpha(1)
lh._sizes = [500]
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
2. 加载tick日志
%%bash
ls -lhrt /tick_data_home/ | tail -n 1
df=pandas.read_csv(filepath_or_buffer="/tick_data_home/clang-15_13324_1691799886944_1",sep=',' , quotechar="'")
df.info()
RangeIndex: 3236685 entries, 0 to 3236684
Data columns (total 22 columns):
# Column Dtype
--- ------ -----
0 滴答 int64
1 funcLocalClock int64
2 tickKind int64
3 funcEnterId int64
4 hasFuncCallChain int64
5 funcEnterIdSeqLen int64
6 funcEnterIdSeq object
7 rTSVarC int64
8 d栈生 int64
9 d栈死 int64
10 d堆生 int64
11 d堆死 int64
12 栈生 int64
13 栈死 int64
14 栈净 int64
15 堆生 int64
16 堆死 int64
17 堆净 int64
18 srcFile object
19 funcLine int64
20 funcCol int64
21 funcName object
dtypes: int64(19), object(3)
memory usage: 543.3+ MB
tickKind定义
文件: /pubx/clang-ctk/t_clock_tick/t_clock_tick.cpp
`cpp
enum TickKind{
NormalTick=0,
FuncEnter=1,
FuncReturn=2
};
NormalTick=0
#函数进入
FuncEnter=1
#函数返回tick
FuncReturn=2
这里不关注 一般tick,因此删除一般tick,可以大大提高本脚本运行速度
print(df.shape)
df.drop( df[df['tickKind'] == NormalTick].index, inplace=True)
print(df.shape)
(3236685, 22)
(666718, 22)
由于tick.cpp中 funcEnterIdSeq 构造很脏,需要清洗
funcEnterIdSeq 只有函数进入 才有正常字符串,其他(比如函数出、滴答)会出现NAN,因此要将NAN替换为空字符串
df['funcEnterIdSeq'].fillna('', inplace=True)
‘’ funcEnterIdSeq ‘’ 明显多了一对单引号,去掉多余的一对单引号
-2100558033#2#1# 左侧第一个负数,是因为拿了不是自己内存区域导致的,需要去掉。
df['funcEnterIdSeq']=df.funcEnterIdSeq.apply(lambda seqK: seqK[seqK.find("#")+1:].replace("'","") )
3. funcId 构造
df.head(1)

3.1 新增列 funcLoc 相当于字符串样式的funId
df['funcLoc']=df .apply(lambda r: f'{r.srcFile}_{r.funcLine}_{r.funcCol}', axis=1 )
df['funcLoc'].values[:4]
array(['/pubx/llvm-project/llvm/lib/Support/CommandLine.cpp_42_42',
'/pubx/llvm-project/llvm/lib/Support/CommandLine.cpp_41_41',
'/pubx/llvm-project/llvm/lib/Support/ManagedStatic.cpp_77_77',
'/pubx/llvm-project/llvm/lib/Support/Threading.cpp_36_36'],
dtype=object)
3.2 以 区间[0, funcLoc不重复个数-1] 作为 funcId表
funcLoc_values=df['funcLoc'].values
type(funcLoc_values)
len(funcLoc_values)
funcLoc_list=list(funcLoc_values)
len(funcLoc_list)
funcLoc_set=set(funcLoc_values)
len(funcLoc_set)
uqFuncLoc_Ls=list(funcLoc_set)
len(uqFuncLoc_Ls)
funcId_Tab=dict( [(fL,j) for j,fL in enumerate(uqFuncLoc_Ls)] )
list(funcId_Tab.items())[:5]
[('/pubx/llvm-project/llvm/lib/Support/MemoryBuffer.cpp_83_83', 0),
('/pubx/llvm-project/clang/lib/Driver/Driver.cpp_58_58', 1),
('/pubx/llvm-project/clang/lib/CodeGen/CodeGenTypes.cpp_69_69', 2),
('/pubx/llvm-project/llvm/lib/MC/MCFragment.cpp_64_64', 3),
('/pubx/llvm-project/llvm/lib/Support/VirtualFileSystem.cpp_57_57', 4)]
funcId2Loc_Tab=dict( [(j,fL) for j,fL in enumerate(uqFuncLoc_Ls)] )
print("funcId个数:",len(funcId_Tab))
3.3 参照 funLoc列、funcId表 新增列funcId
df['funcId']=df['funcLoc'] .apply(lambda fLocJ: funcId_Tab[fLocJ] )
df['funcId'].values[:10]
df.head(2)
df.info()
Int64Index: 666718 entries, 0 to 3236683
Data columns (total 24 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 滴答 666718 non-null int64
1 funcLocalClock 666718 non-null int64
2 tickKind 666718 non-null int64
3 funcEnterId 666718 non-null int64
4 hasFuncCallChain 666718 non-null int64
5 funcEnterIdSeqLen 666718 non-null int64
6 funcEnterIdSeq 666718 non-null object
7 rTSVarC 666718 non-null int64
8 d栈生 666718 non-null int64
9 d栈死 666718 non-null int64
10 d堆生 666718 non-null int64
11 d堆死 666718 non-null int64
12 栈生 666718 non-null int64
13 栈死 666718 non-null int64
14 栈净 666718 non-null int64
15 堆生 666718 non-null int64
16 堆死 666718 non-null int64
17 堆净 666718 non-null int64
18 srcFile 666718 non-null object
19 funcLine 666718 non-null int64
20 funcCol 666718 non-null int64
21 funcName 666718 non-null object
22 funcLoc 666718 non-null object
23 funcId 666718 non-null int64
dtypes: int64(20), object(4)
memory usage: 127.2+ MB