下载配置protobuf
下载源码:
wget https://github.com/google/protobuf/archive/v3.4.1.tar.gz -O protobuf3.4.1.tar.gz
解压:sudo tar xvzf protobuf3.4.1.tar.gz
安装依赖工具:
sudo apt-get install autoconf
sudo apt-get install automake
sudo apt-get install libtool
进入解压文件 cd protobuf
生成配置文件(编译时保证网络链接,需要下载文件,protobuf 可以在树莓派上多核编译)
sudo ./autogen.sh
执行配置文件 sudo ./configure 编译: sudo make -j2 编译完成后检查 sudo make check 安装: sudo make install
下载并安装Tensorflow和Opencv(省略:分两个版本对比)
下载谷歌model:https://github.com/tensorflow/models
cd models/research/ 执行
protoc object_detection/protos/*.proto --python_out=.
然后执行(slim 做特征提取)
export PYTHONPATH= P Y T H O N P A T H : ‘ p w d ‘ : ‘ p w d ‘ / s l i m 由 于 没 有 在 配 置 文 件 里 设 置 所 以 每 次 终 端 要 设 置 这 个 , 执 行 必 须 在 r e s e a r c h 文 件 夹 下 检 测 m o d u l e 换 环 境 是 否 搭 建 完 成 。 在 终 端 进 入 文 件 夹 p y t h o n m o d e l b u i l d e r t e s t . p y 如 果 测 试 出 现 I m p o r t E r r o r : N o m o d u l e n a m e d n e t s 错 误 是 因 为 之 前 的 e x p o r t P Y T H O N P A T H = PYTHONPATH:`pwd`:`pwd`/slim 由于没有在配置文件里设置所以每次终端要设置这个,执行必须在 research 文件夹下 检测 module 换环境是否搭建完成。 在终端进入文件夹 python model_builder_test.py 如果测试出现 ImportError: No module named nets 错误是因为之前的 export PYTHONPATH= PYTHONPATH:‘pwd‘:‘pwd‘/slim由于没有在配置文件里设置所以每次终端要设置这个,执行必须在research文件夹下检测module换环境是否搭建完成。在终端进入文件夹pythonmodelbuildertest.py如果测试出现ImportError:Nomodulenamednets错误是因为之前的exportPYTHONPATH=PYTHONPATH:pwd
:pwd
/slim 没有执行
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time
from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt
from PIL import Image
import cv2
###############################################################################
cap = cv2.VideoCapture(0)
##############################################################################
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# ## Object detection imports
# Here are the imports from the object detection module.
# In[3]:
from utils import label_map_util
from utils import visualization_utils as vis_util
# # Model preparation
# ## Variables
#
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
#
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.
# In[4]:
# What model to download.
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'
#MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
############################################################################
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
###########################################################################
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
#############################################################################
#PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join('/home/pi/models-master/research/object_detection/data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
start = time.clock()
# ## Download Model
# In[5]:
##############################################################################
#opener = urllib.request.URLopener()
#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
###############################################################################
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
end = time.clock()
print('Load the model: %s Seconds'%(end-start))
# ## Load a (frozen) Tensorflow model into memory.
# In[6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
# In[7]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# ## Helper code
# In[8]:
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# # Detection
# In[9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
# In[10]:
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
start =time.clock()
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Actual detection.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
######################################################################################
#######################################################################################
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
end = time.clock()
print ('One frame detect take time:',end - start)
cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
#protoc object_detection/protos/*.proto --python_out=.
#export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
其中
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' 和 MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
可通过注释更换模型(下载模型后放在research\object_detection路径下即可) ssdlite_mobilenet_v2_coco_2018_05_09的速度更快,精度更高
在Tensorflow1.4和Opencv 3.3.1为
更换为最新版Tensorflow1.9和Opencv3.4后使用轻量级mobilenet_v2_coco最快识别速度达到 3.8S
import cv2
import os
def draw_rectangle(img, rect):
(x, y, w, h) = rect
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
#function to draw text on give image starting from
#passed (x, y) coordinates.
def draw_text(img, text, x, y):
cv2.putText(img, text, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
def detect_face(img):
#convert the test image to gray image as opencv face detector expects gray images
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cascadePath = 'opencv-files/lbpcascade_frontalface.xml'
face_cascade = cv2.CascadeClassifier(cascadePath)
#load OpenCV face detector, I am using LBP which is fast
#there is also a more accurate but slow Haar classifier
#face_cascade = cv2.CascadeClassifier('opencv-files/haarcascade_frontalface_alt.xml')
#let's detect multiscale (some images may be closer to camera than others) images
#result is a list of faces
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5);
#if no faces are detected then return original img
print(len(faces))
if (len(faces) == 0):
return None, None
#under the assumption that there will be only one face,
#extract the face area
(x, y, w, h) = faces[0]
#return only the face part of the image
return gray[y:y+w, x:x+h], faces[0]
test_img = cv2.imread("1.jpg")
img = test_img.copy()
face, rect = detect_face(img)
draw_rectangle(img, rect)
cv2.imshow("face", cv2.resize(img, (400, 500)))
cv2.waitKey(300)
cv2.destroyAllWindows()
使用了haarcascade进行人脸的检测
#import OpenCV module
import cv2
#import os module for reading training data directories and paths
import os
#import numpy to convert python lists to numpy arrays as
#it is needed by OpenCV face recognizers
import numpy as np
#使用OpenCV用来检测脸部的函数
def detect_face(img):
#将测试图像转换为灰度图像,因为opencv人脸检测器需要灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#加载OpenCV人脸检测器,我正在使用的是快速的LBP
#还有一个更准确但缓慢的Haar分类器
face_cascade = cv2.CascadeClassifier('opencv-files/lbpcascade_frontalface.xml')
#让我们检测多尺度(一些图像可能比其他图像更接近相机)图像
#结果是一张脸的列表
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5);
#如果未检测到面部,则返回原始图像
if (len(faces) == 0):
return None, None
#假设只有一张脸,
#提取面部区域
(x, y, w, h) = faces[0]
#只返回图像的正面部分
return gray[y:y+w, x:x+h], faces[0]
#该功能将读取所有人的训练图像,从每个图像检测人脸
#并将返回两个完全相同大小的列表,一个列表
# 每张脸的脸部和另一列标签
def prepare_training_data(data_folder_path):
#------STEP-1--------
#获取数据文件夹中的目录(每个主题的一个目录)
dirs = os.listdir(data_folder_path)
#列表来保存所有主题的面孔
faces = []
#列表以保存所有主题的标签
labels = []
#让我们浏览每个目录并阅读其中的图像
for dir_name in dirs:
#我们的主题目录以字母's'开头
#如果有的话,忽略任何不相关的目录
if not dir_name.startswith("s"):
continue;
#------STEP-2--------
#从dir_name中提取主题的标签号
#目录名称格式= slabel
#,所以从dir_name中删除字母''会给我们标签
label = int(dir_name.replace("s", ""))
#建立包含当前主题主题图像的目录路径
#sample subject_dir_path = "training-data/s1"
subject_dir_path = data_folder_path + "/" + dir_name
#获取给定主题目录内的图像名称
subject_images_names = os.listdir(subject_dir_path)
#------STEP-3--------
#浏览每个图片的名称,阅读图片,
#检测脸部并将脸部添加到脸部列表
for image_name in subject_images_names:
#忽略.DS_Store之类的系统文件
if image_name.startswith("."):
continue;
#建立图像路径
#sample image path = training-data/s1/1.pgm
image_path = subject_dir_path + "/" + image_name
#阅读图像
image = cv2.imread(image_path)
#显示图像窗口以显示图像
cv2.imshow("Training on image...", image)
cv2.waitKey(100)
#侦测脸部
face, rect = detect_face(image)
#------STEP-4--------
#为了本教程的目的
#我们将忽略未检测到的脸部
if face is not None:
#将脸添加到脸部列表
faces.append(face)
#为这张脸添加标签
labels.append(label)
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.destroyAllWindows()
return faces, labels
#让我们先准备好我们的训练数据
#数据将在两个相同大小的列表中
#一个列表将包含所有的面孔
#数据将在两个相同大小的列表中
print("Preparing data...")
faces, labels = prepare_training_data("training-data")
print("Data prepared")
#打印总面和标签
print("Total faces: ", len(faces))
print("Total labels: ", len(labels))
print("Preparing train....")
face_recognizer = cv2.face.LBPHFaceRecognizer_create()
face_recognizer.train(faces, np.array(labels))
face_recognizer.write('trainer.yml')
print("faces trained. Exiting Program")
训练完成后将会出现一个trainer.yml文件(训练这一步可以在PC上进行)
import cv2
import os
import time
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer.yml')
cascadePath = "lbpcascade_frontalface.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)
font = cv2.FONT_HERSHEY_SIMPLEX
idnum = 0
names = ['', 'Bob','shen','taobi']
cam = cv2.VideoCapture(0)
minW = 0.1*cam.get(3)
minH = 0.1*cam.get(4)
while True:
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=5,
minSize=(int(minW), int(minH))
)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
idnum, confidence = recognizer.predict(gray[y:y+h, x:x+w])
if confidence < 100:
idnum = names[idnum]
confidence = "{0}%".format(round(100 - confidence))
else:
idnum = "unknown"
confidence = "{0}%".format(round(100 - confidence))
cv2.putText(img, str(idnum), (x+5, y-5), font, 1, (0, 0, 255), 1)
cv2.putText(img, str(confidence), (x+5, y+h-5), font, 1, (0, 0, 0), 1)
cv2.imshow('camera', img)
k = cv2.waitKey(10)
if k == 27:
break
cam.release()
cv2.destroyAllWindows()
# def draw_rectangle(img, rect):
# (x, y, w, h) = rect
# cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
# #function to draw text on give image starting from
# #passed (x, y) coordinates.
# def draw_text(img, text, x, y):
# cv2.putText(img, text, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
# def detect_face(img):
# #convert the test image to gray image as opencv face detector expects gray images
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cascadePath = 'opencv-files/lbpcascade_frontalface.xml'
# face_cascade = cv2.CascadeClassifier(cascadePath)
# #load OpenCV face detector, I am using LBP which is fast
# #there is also a more accurate but slow Haar classifier
# #face_cascade = cv2.CascadeClassifier('opencv-files/haarcascade_frontalface_alt.xml')
# #let's detect multiscale (some images may be closer to camera than others) images
# #result is a list of faces
# faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5);
# #if no faces are detected then return original img
# print(len(faces))
# if (len(faces) == 0):
# return None, None
# #under the assumption that there will be only one face,
# #extract the face area
# (x, y, w, h) = faces[0]
# #return only the face part of the image
# return gray[y:y+w, x:x+h], faces[0]
# face_recognizer = cv2.face.LBPHFaceRecognizer_create()
# face_recognizer.read('trainer.yml')
# names = ['', 'Bob','xxx','Taotaotao']
# print("Predicting images...")
# #load test images
# test_img = cv2.imread("1.jpg")
# #perform a prediction
# img = test_img.copy()
# start=time.clock()
# face, rect = detect_face(img)
# label, confidence = face_recognizer.predict(face)
# label_text = names[label]
# end=time.clock()
# draw_rectangle(img, rect)
# draw_text(img, label_text, rect[0], rect[1]-5)
# print("Prediction complete time %s"%(end-start))
# #display both images
# cv2.imshow("0000", cv2.resize(img, (400, 500)))
# cv2.waitKey(3600)
# cv2.destroyAllWindows()
上面为识别视频流(摄像头)注释部分为识别图片
文件的构成为
复制整个文件到树莓派上
python test.py
查看结果
在树莓派3B+上仅使用Opencv3.4检测并识别人脸可以达到 0.2S
Opencv人脸识别:http://www.cnblogs.com/zhuifeng-mayi/p/9171383.html
谷歌Object Detection物体识别:https://blog.csdn.net/qq_36148847/article/details/79306762