#
#作者:韦访
#博客:https://blog.csdn.net/rookie_wei
#微信:1007895847
#添加微信的备注一下是CSDN的
#欢迎大家一起学习
#
这一讲,我们来解析姿态检测的使用源码,请结合之前的几篇博客一起看,
https://blog.csdn.net/rookie_wei/article/details/90551331
https://blog.csdn.net/rookie_wei/article/details/90705880
https://blog.csdn.net/rookie_wei/article/details/93658329
https://blog.csdn.net/rookie_wei/article/details/94655693
https://blog.csdn.net/rookie_wei/article/details/94660075
长话短说,来看主体结构,从run_webcam.py的main函数开始,
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='tf-pose-estimation realtime webcam')
parser.add_argument('--camera', type=int, default=0)
parser.add_argument('--resize', type=str, default='0x0',
help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
parser.add_argument('--resize-out-ratio', type=float, default=4.0,
help='if provided, resize heatmaps before they are post-processed. default=1.0')
parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
parser.add_argument('--show-process', type=bool, default=False,
help='for debug purpose, if enabled, speed for inference is dropped.')
args = parser.parse_args()
logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
一些参数的设置,不解释了,往下看,
logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
w, h = model_wh(args.resize)
if w > 0 and h > 0:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
else:
e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))
上面的代码就是实例化TfPoseEstimator类,这个类就是用来做姿态检测的,很重要,但是我们先不展开分析,先记住这个类很重要,用来做姿态检测的就行了。继续往下看,
logger.debug('cam read+')
cam = cv2.VideoCapture(args.camera)
ret_val, image = cam.read()
logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))
上面的代码就是通过opencv打开并读取摄像头数据的,继续往下看,
while True:
ret_val, image = cam.read()
logger.debug('image process+')
humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio)
logger.debug('postprocess+')
image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
logger.debug('show+')
cv2.putText(image,
"FPS: %f" % (1.0 / (time.time() - fps_time)),
(10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 255, 0), 2)
cv2.imshow('tf-pose-estimation result', image)
fps_time = time.time()
if cv2.waitKey(1) == 27:
break
logger.debug('finished+')
cv2.destroyAllWindows()
上面的代码就是一帧一帧的做姿态检查,并将检测到的骨架显示出不来,并且显示帧率,比较简单。第一篇博客讲过就不再赘述,没看过的可以看看,链接如下,
https://blog.csdn.net/rookie_wei/article/details/90551331
现在来看看TfPoseEstimator类,先看看怎么实例化它,
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
看看get_graph_path函数干了什么,
def get_graph_path(model_name):
dyn_graph_path = {
'cmu': 'graph/cmu/graph_opt.pb',
'openpose_quantize': 'graph/cmu/graph_opt_q.pb',
'mobilenet_thin': 'graph/mobilenet_thin/graph_opt.pb',
'mobilenet_v2_large': 'graph/mobilenet_v2_large/graph_opt.pb',
'mobilenet_v2_large_r0.5': 'graph/mobilenet_v2_large/graph_r0.5_opt.pb',
'mobilenet_v2_large_quantize': 'graph/mobilenet_v2_large/graph_opt_q.pb',
'mobilenet_v2_small': 'graph/mobilenet_v2_small/graph_opt.pb',
}
base_data_dir = dirname(dirname(abspath(__file__)))
if os.path.exists(os.path.join(base_data_dir, 'models')):
base_data_dir = os.path.join(base_data_dir, 'models')
else:
base_data_dir = os.path.join(base_data_dir, 'tf_pose_data')
graph_path = os.path.join(base_data_dir, dyn_graph_path[model_name])
if os.path.isfile(graph_path):
return graph_path
raise Exception('Graph file doesn\'t exist, path=%s' % graph_path)
上面的代码很简单,根据输入的model_name返回对应的.pb文件的路径。接着来看看TfPoseEstimator类的init函数,
def __init__(self, graph_path, target_size=(320, 240), tf_config=None):
self.target_size = target_size
# load graph
logger.info('loading graph from %s(default size=%dx%d)' % (graph_path, target_size[0], target_size[1]))
with tf.gfile.GFile(graph_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
self.graph = tf.get_default_graph()
tf.import_graph_def(graph_def, name='TfPoseEstimator')
self.persistent_sess = tf.Session(graph=self.graph, config=tf_config)
# for op in self.graph.get_operations():
# print(op.name)
# for ts in [n.name for n in tf.get_default_graph().as_graph_def().node]:
# print(ts)
self.tensor_image = self.graph.get_tensor_by_name('TfPoseEstimator/image:0')
self.tensor_output = self.graph.get_tensor_by_name('TfPoseEstimator/Openpose/concat_stage7:0')
上面的代码是读取传进来的.pb文件,然后,获取image:0和Openpose/concat_stage7:0网络,这两个网络是上一篇博客分析的cmu网络模型的输入和输出网络。继续往下看,
self.tensor_heatMat = self.tensor_output[:, :, :, :19] #[batch, height, width, channels]
self.tensor_pafMat = self.tensor_output[:, :, :, 19:]
从论文的解析我们可以知道,输入是有两个网络组成的,其中一个是深度为19的关键点置信图heatMat,另一个是深度为38的肢干矢量图pafMat,网络的输出层是由这两个图拼接起来的,所以上面的代码将它再分离,得到heatMat和pafMat网络,继续往下看,
# 对网络进行缩放
self.upsample_size = tf.placeholder(dtype=tf.int32, shape=(2,), name='upsample_size')
self.tensor_heatMat_up = tf.image.resize_area(self.tensor_output[:, :, :, :19], self.upsample_size,
align_corners=False, name='upsample_heatmat')
self.tensor_pafMat_up = tf.image.resize_area(self.tensor_output[:, :, :, 19:], self.upsample_size,
align_corners=False, name='upsample_pafmat')
对输出网络进行缩放,继续看,
# 获取高斯热图
smoother = Smoother({'data': self.tensor_heatMat_up}, 25, 3.0)
gaussian_heatMat = smoother.get_output()
# 最大池化
max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME')
我们前面的博客有讲过,用于训练的关键点置信图是通过COCO数据集的关键点由高斯核函数计算得到的,所以经过神经网络计算以后,我们得到的也是类似下面的置信图,
而上图的每个关键点是由很多个点组成的,并不是一个具体的坐标点,所以上面的代码就是将每个关键点求得一个具体的点。来看看它具体怎么做,看看Smoother类,先看init函数,
def __init__(self, inputs, filter_size, sigma):
self.inputs = inputs
self.terminals = []
self.layers = dict(inputs)
self.filter_size = filter_size
self.sigma = sigma
self.setup()
初始化一些值而已,没什么可看的,看这个setup函数,
def setup(self):
self.feed('data').conv(name='smoothing')
来看conv函数,
@layer
def conv(self,
input,
name,
padding='SAME'):
# Get the number of channels in the input
c_i = input.get_shape().as_list()[3]
# Convolution for a given input and kernel
convolve = lambda i, k: tf.nn.depthwise_conv2d(i, k, [1, 1, 1, 1], padding=padding)
with tf.variable_scope(name) as scope:
kernel = self.make_gauss_var('gauss_weight', self.filter_size, self.sigma, c_i)
output = convolve(input, kernel)
return output
上面的make_gauss_var函数是用来生成高斯核的,tf.nn.depthwise_conv2d
则是深度卷积操作,为何用深度卷积而不是直接卷积?因为直接卷积的话,不管你多有层网络,最后都会加起来变成一张网络,而我们这里的需求是,我有多少张网络进去,你卷积以后还是保留我原来的网络层数。具体的可以百度一下深度卷积的知识,参考链接如下,
https://blog.csdn.net/mao_xiao_feng/article/details/78003476
回到TfPoseEstimator类的init函数,
# tf.where(tensor, a, b)
# a , b 是维度相同的tensor,
# 将tensor中true的位置替换为a中相同位置的值,
# 将tensor中false的位置替换为b中相同位置的值
self.tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat))
self.heatMat = self.pafMat = None
经过上面的代码以后,得到的tensor_peaks就是出来关键点的坐标以外都是0的图。往下看,
# warm-up
# 初始化变量
self.persistent_sess.run(tf.variables_initializer(
[v for v in tf.global_variables() if
v.name.split(':')[0] in [x.decode('utf-8') for x in
self.persistent_sess.run(tf.report_uninitialized_variables())]
])
)
上面就是初始化变量而已了,没什么好说的,接下来的代码只是测试网络能不能跑通而已,没什么意义就不看了。
回到run_webcam.py的main函数,来看看e.inference函数做了什么,这个是非常关键的函数,通过这个函数就得到了人体骨架的数据,
def inference(self, npimg, resize_to_default=True, upsample_size=1.0):
if npimg is None:
raise Exception('The image is not valid. Please check your image exists.')
if resize_to_default:
upsample_size = [int(self.target_size[1] / 8 * upsample_size), int(self.target_size[0] / 8 * upsample_size)]
else:
upsample_size = [int(npimg.shape[0] / 8 * upsample_size), int(npimg.shape[1] / 8 * upsample_size)]
if self.tensor_image.dtype == tf.quint8:
# quantize input image
npimg = TfPoseEstimator._quantize_img(npimg)
pass
print("upsample_size:", upsample_size)
logger.debug('inference+ original shape=%dx%d' % (npimg.shape[1], npimg.shape[0]))
img = npimg
if resize_to_default:
img = self._get_scaled_img(npimg, None)[0][0]
# 获取峰值图,热图,矢量图
peaks, heatMat_up, pafMat_up = self.persistent_sess.run(
[self.tensor_peaks, self.tensor_heatMat_up, self.tensor_pafMat_up], feed_dict={
self.tensor_image: [img], self.upsample_size: upsample_size
})
上面的代码就得到了经过高斯核卷积以后的关键点置信图、原始的关键点置信图以及肢干矢量图,接着看,
peaks = peaks[0]
self.heatMat = heatMat_up[0]
self.pafMat = pafMat_up[0]
logger.debug('inference- heatMat=%dx%d pafMat=%dx%d' % (
self.heatMat.shape[1], self.heatMat.shape[0], self.pafMat.shape[1], self.pafMat.shape[0]))
t = time.time()
# (360, 640, 19)
print(peaks.shape)
# (360, 640, 19)
print(self.heatMat.shape)
# (360, 640, 38)
print(self.pafMat.shape)
# exit()
# CocoPose.display_image(npimg, self.heatMat.astype(np.float32), self.pafMat.astype(np.float32))
humans = PoseEstimator.estimate_paf(peaks, self.heatMat, self.pafMat)
logger.debug('estimate time=%.5f' % (time.time() - t))
return humans
然后将这三张图传给PoseEstimator.estimate_paf函数,来看看这个函数,
@staticmethod
def estimate_paf(peaks, heat_mat, paf_mat):
pafprocess.process_paf(peaks, heat_mat, paf_mat)
humans = []
for human_id in range(pafprocess.get_num_humans()):
human = Human([])
is_added = False
# 获取当前人的各个关节点
# for part_idx in range(18):
for part_idx in range(14):
c_idx = int(pafprocess.get_part_cid(human_id, part_idx))
if c_idx < 0:
continue
is_added = True
human.body_parts[part_idx] = BodyPart(
'%d-%d' % (human_id, part_idx), part_idx,
float(pafprocess.get_part_x(c_idx)) / heat_mat.shape[1],
float(pafprocess.get_part_y(c_idx)) / heat_mat.shape[0],
pafprocess.get_part_score(c_idx)
)
if is_added:
score = pafprocess.get_score(human_id)
human.score = score
humans.append(human)
return humans
上面最重要的函数是pafprocess.process_paf(peaks, heat_mat, paf_mat)函数,经过这个函数以后就得到了图片中有多少个人,以及这些人的关键点坐标,哪些关键点组成哪个人的哪个肢干等等数据,想要的数据都得到了。来看看这个函数,
def process_paf(p1, h1, f1):
return _pafprocess.process_paf(p1, h1, f1)
这个函数的实现是在tf_pose/pafprocess/pafprocess.py文件,你在这个文件里搜索process_paf函数发现找不到的,那么全局搜索process_paf关键词,发现这个函数其实是用c++实现的,在pafprocess.cpp文件里,来看看这个函数。
int process_paf(int p1, int p2, int p3, float *peaks, int h1, int h2, int h3, float *heatmap, int f1, int f2, int f3, float *pafmap) {
// const int THRE_CNT = 4;
// const double THRESH_PAF = 0.40;
// 要检测18个关键点, NUM_PART = 18
vector peak_infos[NUM_PART];
int peak_cnt = 0;
//解析peaks
// part_id : 关节点 y:高 x:宽
for (int part_id = 0; part_id < NUM_PART; part_id ++) {
for (int y = 0; y < p1; y ++) {
for (int x = 0; x < p2; x ++) {
// THRESH_HEAT = 0.05
// (y, x, part_id) 对应于第part_id个特征图的(x, y)点
if (PEAKS(y, x, part_id) > THRESH_HEAT) {
Peak info;
info.id = peak_cnt++;
info.x = x;
info.y = y;
info.score = HEAT(y, x, part_id);
peak_infos[part_id].push_back(info);
}
}
}
}
peak_infos_line.clear();
for (int part_id = 0; part_id < NUM_PART; part_id ++) {
for (int i = 0; i < (int) peak_infos[part_id].size(); i ++) {
peak_infos_line.push_back(peak_infos[part_id][i]);
}
}
// Start to Connect
// COCOPAIRS_SIZE = 19 ,19个肢干
vector connection_all[COCOPAIRS_SIZE];
for (int pair_id = 0; pair_id < COCOPAIRS_SIZE; pair_id ++) {
vector candidates;
// COCOPAIRS 包含的是肢干连接对
// 比如,pair_id = 0,对应于COCOPAIRS的是{1, 2},对应的肢干是脖子到右肩连接成的肢干
// peak_a_list 则对应于脖子的关节点, peak_b_list对应于右肩的关节点
vector& peak_a_list = peak_infos[COCOPAIRS[pair_id][0]];
vector& peak_b_list = peak_infos[COCOPAIRS[pair_id][1]];
if (peak_a_list.size() == 0 || peak_b_list.size() == 0) {
continue;
}
// 遍历所有向量的起点A
for (int peak_a_id = 0; peak_a_id < (int) peak_a_list.size(); peak_a_id ++) {
// 获得向量起点a
Peak& peak_a = peak_a_list[peak_a_id];
// 遍历向量起点B,找到与向量起点a的最佳匹配
for (int peak_b_id = 0; peak_b_id < (int) peak_b_list.size(); peak_b_id ++) {
// 获得向量起点a
Peak& peak_b = peak_b_list[peak_b_id];
// calculate vector(direction)
// 以a为起点,b为终点的向量XY
VectorXY vec;
vec.x = peak_b.x - peak_a.x;
vec.y = peak_b.y - peak_a.y;
// 计算向量XY的模,即向量的长度
float norm = (float) sqrt(vec.x * vec.x + vec.y * vec.y);
// 如果长度太小就pass 了
if (norm < 1e-12) continue;
// 向量XY的方向余弦和方向正弦
vec.x = vec.x / norm;
vec.y = vec.y / norm;
vector paf_vecs = get_paf_vectors(pafmap, COCOPAIRS_NET[pair_id][0], COCOPAIRS_NET[pair_id][1], f2, f3, peak_a, peak_b);
float scores = 0.0f;
// criterion 1 : score treshold count
int criterion1 = 0;
for (int i = 0; i < STEP_PAF; i ++) {
// 用两个向量的方向余弦相乘再加上方向余弦相乘,如果两个向量完全一致,则等于1
float score = vec.x * paf_vecs[i].x + vec.y * paf_vecs[i].y;
scores += score;
// THRESH_VECTOR_SCORE = 0.05
if (score > THRESH_VECTOR_SCORE) criterion1 += 1;
}
float criterion2 = scores / STEP_PAF + min(0.0, 0.5 * h1 / norm - 1.0);
//THRESH_VECTOR_CNT1 = 6;
if (criterion1 > THRESH_VECTOR_CNT1 && criterion2 > 0) {
ConnectionCandidate candidate;
candidate.idx1 = peak_a_id;
candidate.idx2 = peak_b_id;
candidate.score = criterion2;
candidate.etc = criterion2 + peak_a.score + peak_b.score;
candidates.push_back(candidate);
}
}
}
vector& conns = connection_all[pair_id];
sort(candidates.begin(), candidates.end(), comp_candidate);
for (int c_id = 0; c_id < (int) candidates.size(); c_id ++) {
ConnectionCandidate& candidate = candidates[c_id];
bool assigned = false;
for (int conn_id = 0; conn_id < (int) conns.size(); conn_id ++) {
if (conns[conn_id].peak_id1 == candidate.idx1) {
// already assigned
assigned = true;
break;
}
if (assigned) break;
if (conns[conn_id].peak_id2 == candidate.idx2) {
// already assigned
assigned = true;
break;
}
if (assigned) break;
}
if (assigned) continue;
Connection conn;
conn.peak_id1 = candidate.idx1;
conn.peak_id2 = candidate.idx2;
conn.score = candidate.score;
conn.cid1 = peak_a_list[candidate.idx1].id;
conn.cid2 = peak_b_list[candidate.idx2].id;
conns.push_back(conn);
}
}
// Generate subset
subset.clear();
for (int pair_id = 0; pair_id < COCOPAIRS_SIZE; pair_id ++) {
vector& conns = connection_all[pair_id];
int part_id1 = COCOPAIRS[pair_id][0];
int part_id2 = COCOPAIRS[pair_id][1];
for (int conn_id = 0; conn_id < (int) conns.size(); conn_id ++) {
int found = 0;
int subset_idx1=0, subset_idx2=0;
for (int subset_id = 0; subset_id < (int) subset.size(); subset_id ++) {
if (subset[subset_id][part_id1] == conns[conn_id].cid1 || subset[subset_id][part_id2] == conns[conn_id].cid2) {
if (found == 0) subset_idx1 = subset_id;
if (found == 1) subset_idx2 = subset_id;
found += 1;
}
}
if (found == 1) {
if (subset[subset_idx1][part_id2] != conns[conn_id].cid2) {
subset[subset_idx1][part_id2] = conns[conn_id].cid2;
subset[subset_idx1][19] += 1;
subset[subset_idx1][18] += peak_infos_line[conns[conn_id].cid2].score + conns[conn_id].score;
}
} else if (found == 2) {
int membership;
for (int subset_id = 0; subset_id < 18; subset_id ++) {
if (subset[subset_idx1][subset_id] > 0 && subset[subset_idx2][subset_id] > 0) {
membership = 2;
}
}
if (membership == 0) {
for (int subset_id = 0; subset_id < 18; subset_id ++) subset[subset_idx1][subset_id] += (subset[subset_idx2][subset_id] + 1);
subset[subset_idx1][19] += subset[subset_idx2][19];
subset[subset_idx1][18] += subset[subset_idx2][18];
subset[subset_idx1][18] += conns[conn_id].score;
subset.erase(subset.begin() + subset_idx2);
} else {
subset[subset_idx1][part_id2] = conns[conn_id].cid2;
subset[subset_idx1][19] += 1;
subset[subset_idx1][18] += peak_infos_line[conns[conn_id].cid2].score + conns[conn_id].score;
}
} else if (found == 0 && pair_id < 17) {
vector row(20);
for (int i = 0; i < 20; i ++) row[i] = -1;
row[part_id1] = conns[conn_id].cid1;
row[part_id2] = conns[conn_id].cid2;
row[19] = 2;
row[18] = peak_infos_line[conns[conn_id].cid1].score +
peak_infos_line[conns[conn_id].cid2].score +
conns[conn_id].score;
subset.push_back(row);
}
}
}
// delete some rows
for (int i = subset.size() - 1; i >= 0; i --) {
if (subset[i][19] < THRESH_PART_CNT || subset[i][18] / subset[i][19] < THRESH_HUMAN_SCORE)
subset.erase(subset.begin() + i);
}
return 0;
}
//(pafmap, COCOPAIRS_NET[pair_id][0], COCOPAIRS_NET[pair_id][1], f2, f3, peak_a, peak_b)
vector get_paf_vectors(float *pafmap, const int& ch_id1, const int& ch_id2, int& f2, int& f3, Peak& peak1, Peak& peak2) {
vector paf_vectors;
// 将向量ab等分成10段
const float STEP_X = (peak2.x - peak1.x) / float(STEP_PAF);
const float STEP_Y = (peak2.y - peak1.y) / float(STEP_PAF);
for (int i = 0; i < STEP_PAF; i ++) {
// roundpaf 四舍五入,将小数变成整数
// 求每个分段点的坐标
int location_x = roundpaf(peak1.x + i * STEP_X);
int location_y = roundpaf(peak1.y + i * STEP_Y);
// 求 (location_y, location_x, 第part_id个特征图)的方向余弦和方向正弦值
VectorXY v;
v.x = PAF(location_y, location_x, ch_id1);
v.y = PAF(location_y, location_x, ch_id2);
paf_vectors.push_back(v);
}
return paf_vectors;
}
我之前对这里有过详细的注释,后面更新系统的时候忘记将它从Ubuntu中拷贝出来的,现在已经格式化找不到了,过去快两个月了,我也不再想分析了,偷个懒你们自己看吧。我将带有注释的源码上传到下面的链接,有兴趣的可以下载看看,
https://download.csdn.net/download/rookie_wei/11293042