realsense+dlib目标检测

realsense+dlib目标检测

      • 1.环境
      • 2.训练自己的模型
      • 3.代码

1.环境

  • 双系统 win7 + ubuntu18.04
  • ROS Melodic安装:http://wiki.ros.org/cn/melodic/Installation/Ubuntu
    注:Melodic可以安装在 Ubuntu Artful (17.10),Bionic (18.04 LTS),Debian Stretch 和其他一些平台上。
    Kinetic在Ubuntu Wily(15.10)和Ubuntu Xenial (16.04 LTS)可用。
    Indigo在Ubuntu Trusty (14.04 LTS) 和其他平台可用。
  • libreasense(Intel® RealSense™ SDK 2.0)安装: https://github.com/IntelRealSense/librealsense/blob/master/doc/distribution_linux.md
  • opencv:https://docs.opencv.org/master/d2/de6/tutorial_py_setup_in_ubuntu.html
  • dlib:https://github.com/davisking/dlib

2.训练自己的模型

  1. imglab制作数据集:https://github.com/davisking/dlib/tree/master/tools/imglab
  2. 训练及测试:https://github.com/davisking/dlib/blob/master/python_examples/train_object_detector.py
  3. 现有模型:https://github.com/davisking/dlib-models

3.代码

#!/usr/bin/python
import pyrealsense2 as rs
import numpy as np
import cv2
import time
import darknet as dn
import dlib
import rospy
from cv_bridge import CvBridge
import sensor_msgs
from object_msgs.msg import ObjectsInBoxes,ObjectInBox
import geometry_msgs
 
# Declare pointcloud object, for calculating pointclouds and texture mappings
pc = rs.pointcloud()
# We want the points object to be persistent so we can display the last cloud when a frame drops
points = rs.points()

 
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 15)
 
# Start streaming
pipe_profile = pipeline.start(config)
 
# Create an align object
# rs.align allows us to perform alignment of depth frames to others frames
# The "align_to" is the stream type to which we plan to align depth frames.
align_to = rs.stream.color
align = rs.align(align_to)

rospy.init_node("rs2_object_detection")
bridge = CvBridge()
color_pub=rospy.Publisher("rs2/color_image/object_detection",sensor_msgs.msg.Image,queue_size=1)
boxes_pub=rospy.Publisher("rs2/objects_in_boxs",ObjectsInBoxes,queue_size=1)
point_pub=rospy.Publisher("rs2/point",geometry_msgs.msg.Point,queue_size=1)
rate = rospy.Rate(10.0)
while not rospy.is_shutdown():
    frames = pipeline.wait_for_frames()
    aligned_frames = align.process(frames)
    depth_frame = aligned_frames.get_depth_frame()
    color_frame = aligned_frames.get_color_frame()
    img_color = np.asanyarray(color_frame.get_data())
    img_depth = np.asanyarray(depth_frame.get_data())

    # Intrinsics & Extrinsics
    depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics
    color_intrin = color_frame.profile.as_video_stream_profile().intrinsics
    depth_to_color_extrin = depth_frame.profile.get_extrinsics_to(color_frame.profile)

    # Depth scale - units of the values inside a depth frame, i.e how to convert the value to units of 1 meter
    depth_sensor = pipe_profile.get_device().first_depth_sensor()
    depth_scale = depth_sensor.get_depth_scale()
	
	"""
    # Map depth to color
    depth_pixel = [240, 320]   # Random pixel
    depth_point = rs.rs2_deproject_pixel_to_point(depth_intrin, depth_pixel, depth_scale)

    color_point = rs.rs2_transform_point_to_point(depth_to_color_extrin, depth_point)
    color_pixel = rs.rs2_project_point_to_pixel(color_intrin, color_point)
    print("depth scale:",depth_scale)
    print('depth_point: ',depth_point)
    print('color_point: ',color_point)
    print('color_pixel: ',color_pixel)

    pc.map_to(color_frame)
    points = pc.calculate(depth_frame)
    vtx = np.asanyarray(points.get_vertices())
    tex = np.asanyarray(points.get_texture_coordinates())
    i = 640*320+240
    #print ('depth: ',[np.float(vtx[i][0]),np.float(vtx[i][1]),np.float(vtx[i][2])])

    cv2.circle(img_color, (320,240), 4, [255,0,255], thickness=-1)
    cv2.putText(img_color,"Dis:"+str(img_depth[320,240]), (40,40), cv2.FONT_HERSHEY_SIMPLEX, 1.2,[255,0,255])
    cv2.putText(img_color,"X:"+str(np.float(vtx[i][0])), (40,80), cv2.FONT_HERSHEY_SIMPLEX, 1.2,[255,0,255])
    cv2.putText(img_color,"Y:"+str(np.float(vtx[i][1])), (40,120), cv2.FONT_HERSHEY_SIMPLEX, 1.2,[255,0,255])
    cv2.putText(img_color,"Z:"+str(np.float(vtx[i][2])), (40,160), cv2.FONT_HERSHEY_SIMPLEX, 1.2,[255,0,255])
    cv2.imshow('depth_frame',img_color)
    key = cv2.waitKey(1)
    if key & 0xFF == ord('q'):
        break
    """

    '''
    npy_vtx = np.zeros((len(vtx), 3), float)
    print('len: ',len(vtx))
    for i in range(len(vtx)):
        npy_vtx[i][0] = np.float(vtx[i][0])
        npy_vtx[i][1] = np.float(vtx[i][1])
        npy_vtx[i][2] = np.float(vtx[i][2])
    npy_tex = np.zeros((len(tex), 3), float)
    for i in range(len(tex)):
        npy_tex[i][0] = np.float(tex[i][0])
        npy_tex[i][1] = np.float(tex[i][1])
    '''


    """ yolov3 test
    cv2.imwrite("color_test.jpg",img_color)
    net = dn.load_net("/home/zouchengxin/darknet/cfg/yolov3.cfg", "/home/zouchengxin/darknet/yolov3.weights", 0)
    meta = dn.load_meta("/home/zouchengxin/darknet/cfg/coco.data")
    #r = detect(net, meta, "/home/zouchengxin/darknet/data/dog.jpg")
    rs = dn.detect(net, meta, "color_test.jpg")
    print(rs)
    for r in rs:
        x1 = int(r[2][0]-r[2][2]/2)
        y1 = int(r[2][1]-r[2][3]/2)
        x2 = int(r[2][0]+r[2][2]/2)
        y2 = int(r[2][1]+r[2][3]/2)
        label = r[0]
        score = r[1]
        cv2.putText(img_color,label+":"+str(score), (x1,y1-10), cv2.FONT_HERSHEY_SIMPLEX, 1.0,[0,255,0],1)
        cv2.rectangle(img_color,(x1,y1),(x2,y2),(0,255,0),2)
    cv2.imshow("color_test",img_color)
    key = cv2.waitKey(0)
    """

    detector = dlib.simple_object_detector("/home/zouchengxin/catkin_ws/src/realsense/realsense2_camera/nodes/detector.svm")
    #img = dlib.load_rgb_image("/home/zouchengxin/dog_train/3.jpg")
    dets = detector(img_color)
    boxmsg=ObjectsInBoxes()
    pointmsg=geometry_msgs.msg.Point()
    for k, d in enumerate(dets):
        #print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(k, d.left(), d.top(), d.right(), d.bottom()))
        x1=d.left()
        y1=d.top()
        x2=d.right()
        y2=d.bottom()
        cx=int((x1+x2)/2)
        cy=int((y1+y2)/2)
        dis=img_depth[cx,cy]*depth_scale
        cv2.putText(img_color,"glass", (x1,y1-60), cv2.FONT_HERSHEY_SIMPLEX, 0.5,[0,255,0],1)
        cv2.putText(img_color,"distance:"+str(dis)+"m", (x1,y1-30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,[0,255,0],1)
        cv2.circle(img_color, (cx,cy), 3, [0,255,0], thickness=-1)
        cv2.rectangle(img_color,(x1,y1),(x2,y2),(0,255,0),2)
        depth_point = rs.rs2_deproject_pixel_to_point(depth_intrin, [cx,cy], dis)
        objmsg=ObjectInBox()
        objmsg.object.object_name="glass"
        objmsg.roi.x_offset=x1
        objmsg.roi.y_offset=y1
        objmsg.roi.width=int(x2-x1)
        objmsg.roi.height=int(y2-y1)
        boxmsg.objects_vector.append(objmsg)
        pointmsg.x=depth_point[0]
        pointmsg.y=depth_point[1]
        pointmsg.z=depth_point[2]
        print("(x=%f,y=%f,z=%f)" % (depth_point[0],depth_point[1],depth_point[2]))
    cv2.imshow('color_frame',img_color)
    key = cv2.waitKey(1)
    if key & 0xFF == ord('q'):
        break
    imgmsg=bridge.cv2_to_imgmsg(img_color, encoding="bgr8")
    color_pub.publish(imgmsg)
    boxes_pub.publish(boxmsg)
    point_pub.publish(pointmsg)
    rate.sleep()
    
pipeline.stop()

'''
pc = rs.pointcloud()
frames = pipeline.wait_for_frames()
depth = frames.get_depth_frame()
color = frames.get_color_frame()
img_color = np.asanyarray(color_frame.get_data())
img_depth = np.asanyarray(depth_frame.get_data())
pc.map_to(color)
points = pc.calculate(depth)
vtx = np.asanyarray(points.get_vertices())
tex = np.asanyarray(points.get_texture_coordinates())
npy_vtx = np.zeros((len(vtx), 3), float)
for i in range(len(vtx)):
    npy_vtx[i][0] = np.float(vtx[i][0])
    npy_vtx[i][1] = np.float(vtx[i][1])
    npy_vtx[i][2] = np.float(vtx[i][2])
npy_tex = np.zeros((len(tex), 3), float)
for i in range(len(tex)):
    npy_tex[i][0] = np.float(tex[i][0])
    npy_tex[i][1] = np.float(tex[i][1])
'''

你可能感兴趣的:(机器学习)