一, 大致框架
二, 零散的代码学习
data-get(N, C, T, V, M)(已经包含时间和空间信息)(样本数,channel,时间帧数,num_node,人数)。
joint_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score a
bone_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score b。
(ensemble.py)a+b -> fused score , action label。
① 针对graph文件夹,就是为了return A,即得到邻接矩阵。论文中使用的N*N表示Ak,即代码中的V(num_node)
class Graph:
def __init__(self, labeling_mode='spatial'):
self.A = self.get_adjacency_matrix(labeling_mode)
...
def get_adjacency_matrix(self, labeling_mode=None):
if labeling_mode is None:
return self.A
if labeling_mode == 'spatial':
A = tools.get_spatial_graph(num_node, self_link, inward, outward)
else:
raise ValueError()
return A
tools.py
def get_spatial_graph(num_node, self_link, inward, outward):
I = edge2mat(self_link, num_node)
In = normalize_digraph(edge2mat(inward, num_node))
Out = normalize_digraph(edge2mat(outward, num_node)) #inward, outward是列表,列表里是(a,b)这种坐标类型 的数据
A = np.stack((I, In, Out)) #3×V*V
return A
def edge2mat(link, num_node):
A = np.zeros((num_node, num_node)) #论文中使用的N*N表示Ak,即代码中的V
for i, j in link:
A[j, i] = 1
return A
def normalize_digraph(A): # 除以每列的和(归一化)
Dl = np.sum(A, 0) #对每一列相加
h, w = A.shape #即代码中V*V
Dn = np.zeros((w, w))
for i in range(w):
if Dl[i] > 0:
Dn[i, i] = Dl[i] ** (-1)
AD = np.dot(A, Dn) #h×w w×w -> h×w 即V*V
return AD
determines whether there are connections between two vertexes, It represents the physical structure of the human body.
ntu_rgb_d.py(kinetics.py,num_node = 18,inward本身从0开始)
num_node = 25
self_link = [(i, i) for i in range(num_node)] #相同关节点的连接
inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
(8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
(14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)] #关节点间的可连接方式
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index] #为了从0开始
outward = [(j, i) for (i, j) in inward] #反过来,为了构建无向图
neighbor = inward + outward
② 针对model文件夹,分别对应论文中 adaptive graph convolutional network > 4.1layer(unit_gcn,unit_tcn)4.2block(TCN_GCN_unit);4.3network(Model)
#For the temporal dimension,it is straightforward to perform the graph convolution similar to the classical convolution operation.
class unit_tcn(nn.Module): #temporal GCN(Kt × 1 convolution on the C ×T ×N feature maps) + bn
def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
super(unit_tcn, self).__init__()
pad = int((kernel_size - 1) / 2) #输入输出维度不变
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),stride=(stride, 1))
self.bn = nn.BatchNorm2d(out_channels)
...
def forward(self, x):
x = self.bn(self.conv(x))
return x
class unit_gcn(nn.Module): #spatial GCN + bn + relu
def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):
super(unit_gcn, self).__init__()
inter_channels = out_channels // coff_embedding
num_jpts = A.shape[-1]
self.conv_d = nn.ModuleList() #容器 append
for i in range(self.num_subset):
self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
if adaptive:
self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
self.conv_a = nn.ModuleList()
self.conv_b = nn.ModuleList()
for i in range(self.num_subset):
self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
else:
self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
self.adaptive = adaptive
if attention:
self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)
nn.init.constant_(self.conv_ta.weight, 0)
nn.init.constant_(self.conv_ta.bias, 0)
# s attention
ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts #0 代表 假 , 1 代表真
pad = (ker_jpt - 1) // 2
self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)
nn.init.xavier_normal_(self.conv_sa.weight)
nn.init.constant_(self.conv_sa.bias, 0)
# channel attention
rr = 2
self.fc1c = nn.Linear(out_channels, out_channels // rr)
self.fc2c = nn.Linear(out_channels // rr, out_channels)
nn.init.kaiming_normal_(self.fc1c.weight)
nn.init.constant_(self.fc1c.bias, 0)
nn.init.constant_(self.fc2c.weight, 0)
nn.init.constant_(self.fc2c.bias, 0)
self.attention = attention
if in_channels != out_channels:
self.down = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1),
nn.BatchNorm2d(out_channels)
)
else:
self.down = lambda x: x
self.bn = nn.BatchNorm2d(out_channels)
self.soft = nn.Softmax(-2)
self.tan = nn.Tanh()
self.sigmoid = nn.Sigmoid()
self.relu = nn.ReLU(inplace=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
conv_init(m)
elif isinstance(m, nn.BatchNorm2d):
bn_init(m, 1)
bn_init(self.bn, 1e-6)
for i in range(self.num_subset):
conv_branch_init(self.conv_d[i], self.num_subset)
def forward(self, x):
N, C, T, V = x.size()
y = None
if self.adaptive: #自适应
A = A + self.PA #A+B #nn.Parameter(torch.from_numpy(A.astype(np.float32))) #3*V*V
for i in range(self.num_subset): #f in Cin*T*N two embedding functions(one 1 × 1 convolutional layer )
A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T) #N*V*CT(论文中的N*CT)
A2 = self.conv_b[i](x).view(N, self.inter_c * T, V) #N*CT*V(论文中的CT*N)
A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1)) # N V V(论文中的N*N)高维矩阵 乘 Ck
A1 = A[i] + A1 #(论文中N*N) -> A+B+C
A2 = x.view(N, C * T, V) #论文中的CT*N
z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V)) #N*CT*V (x.size,论文中CT*N,每一个num_subset的输出)
y = z + y if y is not None else z
else:
A = self.A.cuda(x.get_device()) * self.mask
for i in range(self.num_subset):
A1 = A[i]
A2 = x.view(N, C * T, V)
z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
y = z + y if y is not None else z
y = self.bn(y)
y += self.down(x) #residual
y = self.relu(y)
if self.attention:
# spatial attention
# temporal attention
# channel attention ...
return y
class TCN_GCN_unit(nn.Module): #Adaptive graph convolutional block
def __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True):
super(TCN_GCN_unit, self).__init__()
self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention)
self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride) #conv,bn,relu
self.relu = nn.ReLU(inplace=True)
self.attention = attention
if not residual:
self.residual = lambda x: 0
elif (in_channels == out_channels) and (stride == 1):
self.residual = lambda x: x
else:
self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride) #一层conv代表残差
def forward(self, x):
y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))
return y
class Model(nn.Module): #Adaptive graph convolutional network
def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3,drop_out=0, adaptive=True, attention=True):
super(Model, self).__init__()
Graph = import_class(graph)
self.graph = Graph(**graph_args)
A = self.graph.A
self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention)
self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention)
self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)
self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)
self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention)
self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)
self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)
self.fc = nn.Linear(256, num_class)
def forward(self, x):
N, C, T, V, M = x.size()
x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
x = self.data_bn(x)
x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)
x = self.l1(x)
...
x = self.l10(x)
# N*M,C,T,V
c_new = x.size(1)
x = x.view(N, M, c_new, -1)
x = x.mean(3).mean(1)
x = self.drop_out(x)
return self.fc(x) #全连接分类
③ data_gen文件夹下,部分可对应4.4. Two-stream networks,
ntu_gendata.py[kinetics_gendata.py] (get data of joints)
fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32) # N, C, T, V, M
for i, s in enumerate(tqdm(sample_name)):
data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint) #4,25;data-> 3,seq_info['numFrame'],num_joint,max_body[data, label = feeder[i]]
fp[i, :, 0:data.shape[1], :, :] = data #0:data.shape[1]--seq_info['numFrame']
fp = pre_normalization(fp) #N, C, T, V, M preprocess.py
np.save('{}/{}_data_joint.npy'.format(out_path, part), fp) #保存关节数据[np.save(data_out_path, fp)]
gen_bone_data.py (calculate the data of bones based on the data of joints)
for dataset in datasets: #人体关键节点的定义及其连接方式 'ntu/xview', 'ntu/xsub',
for set in sets: # 'train', 'val'
print(dataset, set)
data = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) #下载关节数据
N, C, T, V, M = data.shape
fp_sp = open_memmap(
'../data/{}/{}_data_bone.npy'.format(dataset, set),
dtype='float32',
mode='w+',
shape=(N, 3, T, V, M)) #骨骼信息 #创建或加载内存映射.npy文件
fp_sp[:, :C, :, :, :] = data
for v1, v2 in tqdm(paris[dataset]): #paris是不同数据集的 人关节点的 (a,b)连接索引(论文中有人体关键点的图)
if dataset != 'kinetics':
v1 -= 1 #1~25 -> 0~24
v2 -= 1 #1->0
fp_sp[:, :, :, v1, :] = data[:, :, :, v1, :] - data[:, :, :, v2, :] #length information and direction information of the bone
merge_joint_bone_data.py
for dataset in datasets:
for set in sets:
print(dataset, set)
data_jpt = np.load('../data/{}/{}_data_joint.npy'.format(dataset, set)) #关节
data_bone = np.load('../data/{}/{}_data_bone.npy'.format(dataset, set)) #骨骼
N, C, T, V, M = data_jpt.shape
data_jpt_bone = np.concatenate((data_jpt, data_bone), axis=1) #对应行进行拼接
np.save('../data/{}/{}_data_joint_bone.npy'.format(dataset, set), data_jpt_bone) #joint+bone
gen_motion_data.py # gen_motion_data.py处理得到的temporal edges没用到,temporal edges只是为了后边时间上的卷积即可
for dataset in datasets:
for set in sets:
for part in parts:
print(dataset, set, part)
data = np.load('../data/{}/{}_data_{}.npy'.format(dataset, set, part)) #下载信息 数据集 训练/验证 关节/骨骼
N, C, T, V, M = data.shape
fp_sp = open_memmap(
'../data/{}/{}_data_{}_motion.npy'.format(dataset, set, part),
dtype='float32',
mode='w+',
shape=(N, 3, T, V, M)) #写motion信息
for t in tqdm(range(T - 1)):
fp_sp[:, :, t, :, :] = data[:, :, t + 1, :, :] - data[:, :, t, :, :] #temporal 相同点 连接
fp_sp[:, :, T - 1, :, :] = 0 #举个例子,只有三个时间点,那么表示相邻时间节点的连接的数据, 就只有两个(即三个点,只连接相邻点,有两条线)
④ main.py
class GradualWarmupScheduler(_LRScheduler):
def init_seed(_):
def get_parser():
class Processor():
def __init__(self, arg):
def load_data(self):
def load_model(self):
Model = import_class(self.arg.model)
def load_optimizer(self):
def save_arg(self):
def adjust_learning_rate(self, epoch):
def print_time(self):
def print_log(self, str, print_time=True):
def record_time(self):
def split_time(self):
def train(self, epoch, save_model=False):
def eval(self, epoch, save_score=False, loader_name=['test'], wrong_file=None, result_file=None):
def start(self):
def str2bool(v):
def import_class(name): #import_class(self.arg.feeder) #default='feeder.feeder', help='data loader will be used'
components = name.split('.') #查找所有的'.'间隔的内容,并用列表放置 --2021.4.12更正
mod = __import__(components[0]) # import return model __import__() 函数用于动态加载类和函数 因为考虑到类名经常会发生变化,这里取第一个名字对应的.py文件 【例如feeder.Feeder--> 就是要找到feeder.py的文件】
for comp in components[1:]:
mod = getattr(mod, comp) #getattr() 函数用于返回一个对象comp属性值 【针对上边例子,这里就是要获取feeder.py模块文件中的Feeder类,以供对应位置使用】
return mod
⑤ README.md (4.4. Two-stream networks)
Preprocess the data with #先对数据进行处理,得到关节数据
python data_gen/ntu_gendata.py
python data_gen/kinetics-gendata.py.
Generate the bone data with: #关节数据转换为骨骼数据
python data_gen/gen_bone_data.py
Change the config file depending on what you want. #分别将关节和骨骼的时空数据送入J-stream 和 B-stream,训练
`python main.py --config ./config/nturgbd-cross-view/train_joint.yaml`
`python main.py --config ./config/nturgbd-cross-view/train_bone.yaml`
To ensemble the results of joints and bones, run test firstly to generate the scores of the softmax layer. #测试,产生各自softmax分数
`python main.py --config ./config/nturgbd-cross-view/test_joint.yaml`
`python main.py --config ./config/nturgbd-cross-view/test_bone.yaml`
Then combine the generated scores with: #两个softmax分数相加to obtain the fused score and predictthe action label
`python ensemble.py` --datasets ntu/xview
rotation.py
import numpy as np
import math
def rotation_matrix(axis, theta): #axis给定轴,theta给定θ弧度。
return np.array() #逆时旋转,返回旋转矩阵
def unit_vector(vector):
return vector / np.linalg.norm(vector) #向量vector/默认是二范数->单位向量
def angle_between(v1, v2): #弧度角
return np.arccos()
def x_rotation(vector, theta): #绕x轴旋转三维矢量
return np.dot(R, vector)
def y_rotation(vector, theta): #绕y轴旋转三维矢量
return np.dot(R, vector)
def z_rotation(vector, theta): #绕z轴旋转三维矢量
return np.dot(R, vector)
1. vector / np.linalg.norm(vector) 向量vector/(默认,根号下每个元素的平方)二范数 -> 单位向量
2. np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) np.clip:v1_u, v2_u对应项相乘再相加,范围应该在(-1.0, 1.0),超出则等于边界值。 np.arccos返回的是弧度值
preprocess.py
import sys
sys.path.extend(['../'])
from data_gen.rotation import *
from tqdm import tqdm #进度条
def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
#用前面的帧填充空帧#近中心关节#1(ntu中的脊柱关节和动力学中的颈部关节)#将第一人的髋关节(jpt 0)和脊椎(jpt 1)之间的骨骼与z轴平行#np.cross求叉积(向量积);求出的新的向量是垂直于 z轴#使右肩(jpt 8)和左肩(jpt 4)之间的骨头平行
return data
3.左侧 project 工具栏窗口顶部那个齿轮有个 show member 选项,默认是不开的,勾选后 py 文件会显示内部定义的 class 和 def。每个文件可以自由选择折叠还是展开。
4. 关于tqdm
from tqdm import tqdm 进度条
a=(-1,1,0)
for i,j in enumerate(tqdm(a)):
print(i,j)
>>>
0%| | 0/3 [00:00, ?it/s]
0 -1
1 1
2 0
100%|███████████████████████████████████████████| 3/3 [00:00<00:00, 7281.78it/s]
5. axis = np.cross(joint_top - joint_bottom, [0, 0, 1]) np.cross(a,b)求叉积(向量积);求出的新的向垂直于 a,b形成的平面
6. np.sum()是求总和;np.sum(-1)是先求一个[]里的和
>>> np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
>>> np.eye(3).sum(-1)
array([1., 1., 1.])
>>> np.eye(3).sum(-1).sum(-1)
3.0
>>> np.eye(3).sum()
3.0
>>> np.array([[1,1],[2,2]]).sum(-1)
array([2, 4])
>>> np.array([[1,1],[2,2]]).sum()
6
>>> np.array([[1,1],[2,2]]).sum(0)
array([3, 3])
>>> np.array([[1,1],[2,2]]).sum(1)
array([2, 4])
ntu_gendata.py
import argparse
import pickle
from tqdm import tqdm
import sys
sys.path.extend(['../'])
from data_gen.preprocess import pre_normalization
import numpy as np
import os
def read_skeleton_filter(file): #每一个帧,每个人,每一个关节
return skeleton_sequence
def get_nonzero_std(s): # tvc
return s
def read_xyz(file, max_body=4, num_joint=25): # 取了前两个body
return data #3,seq_info['numFrame'],num_joint,max_body
def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'):
fp = pre_normalization(fp) #N, C, T, V, M
7. index = energy.argsort()[::-1][0:max_body_true]
argsort返回的是数组值从小到大的索引值;[::-1]取从后向前的元素;max_body_true=2(定义好的);;
argsort(axis=1)表示按行排列
8. os.listdir(data_path): 返回指定路径下的文件和文件夹列表
kinetics_gendata.py
class Feeder_kinetics(Dataset):
def __init__(self,
data_path,
label_path,
ignore_empty_sample=True,
window_size=-1,
num_person_in=5,
num_person_out=2):
def load_data(self):
def __len__(self):
return len(self.sample_name)
def __iter__(self):
return self
def __getitem__(self, index):
return data_numpy, label
def gendata(data_path, label_path,
data_out_path, label_out_path,
num_person_in=num_person_in, # observe the first 5 persons
num_person_out=num_person_out, # then choose 2 persons with the highest score
max_frame=max_frame):
9. data_numpy[1, frame_index, :, m] = pose[1::2] [a::b]从下标为a的元素开始,每隔b个元素输出一次;
若b=-1,表示倒数. a表示倒数(012...)a开始。
main.py
10. super().__init__(optimizer) optimizer是 _LRScheduler继承类 的输入
11. answer = input('delete it? y/n:') 接受一个标准输入数据,返回为 string 类型
12. Python __import__() 函数用于动态加载类和函数
getattr(object, name[, default]) 函数用于返回一个对象属性值。等效于object.name
*self._args 表示接受元组类参数;
**kwargs 表示接受字典类参数;
13. vars([object]) 函数返回对象object的属性和属性值的字典对象
14. localtime = time.asctime(time.localtime(time.time()))
localtime格式化时间戳为本地的时间;asctime() 函数接受时间元组并返回一个可读的形式为"Tue Dec 11 18:07:14 2008"(2008年12月11日 周二18时07分14秒)的24个字符的字符串。
15. open('{}/log.txt'.format(self.arg.work_dir), 'a') as f: a代表追加,也就是说,打开这个文件之后直百接定位到文件的末尾。
16. round(v * 100 / sum(timer.values())) round() 方法返回浮点数v * 100 / sum(timer.values())的四舍五入值。
17. value, predict_label = torch.max(output.data, 1) value是每行的最大值,predict_label是对应的索引
>>> import torch
>>> import numpy as np
>>> c=np.array([[1,2],[4,3]])
>>> print(c)
[[1 2]
[4 3]]
>>> b=torch.from_numpy(c)
>>> print(b)
tensor([[1, 2],
[4, 3]])
>>> torch.max(b,1) #b是softmax函数输出的一个tensor,1是每行的最大值(axis)
torch.return_types.max(
values=tensor([2, 4]),
indices=tensor([1, 0]))
feeders/tools.py
18. begin = np.random.randint(step) 取[0, step)的随机整数
19. begin = valid_frame.argmax() 返回的是 元素最大值所对应的索引值
20. move_time = random.choice(move_time_candidate) choice() 方法返回一个列表,元组或字符串的随机项。
21. np.arange函数
node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) #round() 方法返回浮点数 的四舍五入值。 np.arange :【0, T】,步长为T * 1.0 / move_time
node = np.append(node, T) #为node添加T
A = np.random.choice(angle_candidate, num_node) #angle_candidate中选num_node个(注意是np.)
22. self.sample_name, self.label = pickle.load(f, encoding='latin1')
用python2.X pickle写了一个文件,用python3的pickle读取时, 加上encoding='latin1',代码就可以正确识别编码输出内容了。
23. data.mean(axis=2, keepdims=True) 求均值,axis表示维度,keepdims=True表示保持原来维度
[-top_k:]表示倒数top_k个
24. data = data.reshape((1,) + data.shape) #np.array([1,2,3,4]).reshape((2,)+(2,)) -> array([[1, 2], [3, 4]])
agcn.py
25. self.PA = torch.nn.Parameter(torch.from_numpy(A.astype(np.float32)))
将一个不可训练的类型Tensor转换成可以训练的类型parameter,成为了模型中根据训练可以改动的参数
26. self.bn = nn.BatchNorm2d(out_channels) ; self.relu = nn.ReLU(inplace=True)
inplace=True从上层网络bn中传递下来的tensor直接进行修改,这样能够节省运算内存,不用多存储其他变量。