

一.使用谷歌Object detection实现物体识别

1.安装配置谷歌Object detection

wget https://github.com/google/protobuf/archive/v3.4.1.tar.gz -O protobuf3.4.1.tar.gz
解压:sudo tar xvzf protobuf3.4.1.tar.gz
sudo apt-get install autoconf
sudo apt-get install automake
sudo apt-get install libtool
进入解压文件 cd protobuf
生成配置文件(编译时保证网络链接,需要下载文件,protobuf 可以在树莓派上多核编译)
sudo ./autogen.sh
执行配置文件 sudo ./configure 编译: sudo make -j2 编译完成后检查 sudo make check 安装: sudo make install


cd models/research/ 执行
protoc object_detection/protos/*.proto --python_out=.
然后执行(slim 做特征提取)
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time

from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt
from PIL import Image

import cv2

cap = cv2.VideoCapture(0)

# This is needed since the notebook is stored in the object_detection folder.

# ## Object detection imports
# Here are the imports from the object detection module.

# In[3]:

from utils import label_map_util

from utils import visualization_utils as vis_util

# # Model preparation 

# ## Variables
# Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.    
# By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

# In[4]:

# What model to download.
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'
#MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
#PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join('/home/pi/models-master/research/object_detection/data', 'mscoco_label_map.pbtxt')
start = time.clock()
# ## Download Model

# In[5]:
#opener = urllib.request.URLopener()
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())

end = time.clock()
print('Load the model: %s Seconds'%(end-start))
# ## Load a (frozen) Tensorflow model into memory.

# In[6]:

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        tf.import_graph_def(od_graph_def, name='')

# ## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.    Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

# In[7]:

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# ## Helper code

# In[8]:

def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

# # Detection

# In[9]:

# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# In[10]:

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        while True:
            start =time.clock()
            ret, image_np = cap.read()           
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            end = time.clock()
            print ('One frame detect take time:',end - start)
            cv2.imshow('object detection', cv2.resize(image_np, (800,600)))
            if cv2.waitKey(25) & 0xFF == ord('q'):
#protoc object_detection/protos/*.proto --python_out=. 
#export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim 

MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09' 和 MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
可通过注释更换模型(下载模型后放在research\object_detection路径下即可) ssdlite_mobilenet_v2_coco_2018_05_09的速度更快,精度更高
在Tensorflow1.4和Opencv 3.3.1为
更换为最新版Tensorflow1.9和Opencv3.4后使用轻量级mobilenet_v2_coco最快识别速度达到 3.8S



import cv2
import os
def draw_rectangle(img, rect):
    (x, y, w, h) = rect
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
#function to draw text on give image starting from
#passed (x, y) coordinates. 
def draw_text(img, text, x, y):
    cv2.putText(img, text, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)

def detect_face(img):
    #convert the test image to gray image as opencv face detector expects gray images
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    cascadePath = 'opencv-files/lbpcascade_frontalface.xml'
    face_cascade = cv2.CascadeClassifier(cascadePath)
    #load OpenCV face detector, I am using LBP which is fast
    #there is also a more accurate but slow Haar classifier
    #face_cascade = cv2.CascadeClassifier('opencv-files/haarcascade_frontalface_alt.xml')

    #let's detect multiscale (some images may be closer to camera than others) images
    #result is a list of faces
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5);
    #if no faces are detected then return original img
    if (len(faces) == 0):
        return None, None
    #under the assumption that there will be only one face,
    #extract the face area
    (x, y, w, h) = faces[0]
    #return only the face part of the image
    return gray[y:y+w, x:x+h], faces[0]

test_img = cv2.imread("1.jpg")
img = test_img.copy()
face, rect = detect_face(img)
draw_rectangle(img, rect)
cv2.imshow("face", cv2.resize(img, (400, 500)))



#import OpenCV module
import cv2
#import os module for reading training data directories and paths
import os
#import numpy to convert python lists to numpy arrays as 
#it is needed by OpenCV face recognizers
import numpy as np

def detect_face(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    face_cascade = cv2.CascadeClassifier('opencv-files/lbpcascade_frontalface.xml')

    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5);
    if (len(faces) == 0):
        return None, None
    (x, y, w, h) = faces[0]
    return gray[y:y+w, x:x+h], faces[0]
# 每张脸的脸部和另一列标签
def prepare_training_data(data_folder_path):
    dirs = os.listdir(data_folder_path)
    faces = []
    labels = []
    for dir_name in dirs:
        if not dir_name.startswith("s"):
        #目录名称格式= slabel
        label = int(dir_name.replace("s", ""))
        #sample subject_dir_path = "training-data/s1"
        subject_dir_path = data_folder_path + "/" + dir_name
        subject_images_names = os.listdir(subject_dir_path)
        for image_name in subject_images_names:
            if image_name.startswith("."):
            #sample image path = training-data/s1/1.pgm
            image_path = subject_dir_path + "/" + image_name

            image = cv2.imread(image_path)
            cv2.imshow("Training on image...", image)
            face, rect = detect_face(image)
            if face is not None:
    return faces, labels
print("Preparing data...")
faces, labels = prepare_training_data("training-data")
print("Data prepared")

print("Total faces: ", len(faces))
print("Total labels: ", len(labels))
print("Preparing train....")
face_recognizer = cv2.face.LBPHFaceRecognizer_create()
face_recognizer.train(faces, np.array(labels))
print("faces trained. Exiting Program")



import cv2
import os
import time
recognizer = cv2.face.LBPHFaceRecognizer_create()
cascadePath = "lbpcascade_frontalface.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)

idnum = 0

names = ['', 'Bob','shen','taobi']

cam = cv2.VideoCapture(0)
minW = 0.1*cam.get(3)
minH = 0.1*cam.get(4)

while True:
    ret, img = cam.read()
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = faceCascade.detectMultiScale(
        minSize=(int(minW), int(minH))

    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
        idnum, confidence = recognizer.predict(gray[y:y+h, x:x+w])

        if confidence < 100:
            idnum = names[idnum]
            confidence = "{0}%".format(round(100 - confidence))
            idnum = "unknown"
            confidence = "{0}%".format(round(100 - confidence))

        cv2.putText(img, str(idnum), (x+5, y-5), font, 1, (0, 0, 255), 1)
        cv2.putText(img, str(confidence), (x+5, y+h-5), font, 1, (0, 0, 0), 1)

    cv2.imshow('camera', img)
    k = cv2.waitKey(10)
    if k == 27:


python test.py
在树莓派3B+上仅使用Opencv3.4检测并识别人脸可以达到 0.2S


