AlphaPose多路摄像头同步运行的修改方法

作者只提供了单路摄像头的解决方法,我要实现多路摄像头

放两张效果图,第一张是我弄了6个不同的视频,第二张由于缺乏测试摄像头,于是我就把一个摄像头取6次

我做了以下尝试:
1.把多路信号放到一个列表,然后循环取每一路信号,放到一个batch里,送给模型识别,当然,这里没有封装,你也可以把地址放到opt.py里

    url_dir = ['1', '2',‘3’,‘4’,'5','6']   
    lushu = len(url_dir)  for
   	n,url in enumerate(url_dir):
      		locals()['stream_' + str(n)] = cv2.VideoCapture(url)

2.中间直接利用作者的DetectionLoader得到数据塞进DetectionProcessor,发现只能识别两路信号(第一路和最后一路)。这个让我觉得很奇怪,于是打印每一个步骤的列表信息,发现在DetectionLoader结束的那个for循环结束,都能把多路信号的识别结果塞进self.Q,但是一结束循环,Q里就只剩一路信号了,而且后面的每一步里,都只有这一路信号,更奇怪的是,即使只有一路信号,每次识别的结果却是两路信号。最终的解决方案是把DetectionLoader和DetectionProcessor内容合并,这样就可以看到Q里有多路信号了。

for k in range(len(orig_img)):
       
   boxes_k = boxes[dets[:,0]==k]
   
    if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
        if self.Q.full():
            time.sleep(2)
        self.Q.put((orig_img[k], im_name[k], 
        None, None, None, None, None))
        continue
    inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
     opt.inputResW)
    pt1 = torch.zeros(boxes_k.size(0), 2)
    pt2 = torch.zeros(boxes_k.size(0), 2)
    if self.Q.full():
        time.sleep(2)
        
    inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB))
    inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2)
    self.Q.put((inps, orig_img[k], im_name[k], boxes_k, 
    scores[dets[:,0]==k], pt1, pt2))

3.考虑到GPU的识别速度,我这边用的是P4000显卡,性能介于1060和1070之间,所以只能多帧取一帧来解决识别速度问题。

 if num_frames % 25 == 0:
       (grabbed, frame) = locals()['stream_' + str(n)].read()

4.最后每一路得出的结果按名字分成各路信号显示出来

pic_name = im_name.split('_')[0]
if boxes is None:
    if opt.save_img or opt.save_video or opt.vis:
        img = orig_img
        if opt.vis:
            # print('none')
            # print('im_name='+str(im_name))
            cv2.namedWindow('AlphaPose Demo_{}'.format(pic_name),
            cv2.WINDOW_NORMAL)
            cv2.imshow("AlphaPose Demo_{}".format(pic_name), img)
            cv2.waitKey(30)

5.webcam_demo.py的代码只要把49/50行改写成一行代码

det_processor = DetectionLoader(data_loader, batchSize=args.detbatch).start()

以及79行的读取顺序改成跟你匹配的

(orig_img, im_name, boxes, scores, inps, pt1, pt2) = det_processor.read()

以下是完整的dataloader_webcam.py的代码(import就不加上了,都没改动)

class WebcamLoader:
    def __init__(self, webcam, batchSize=1, queueSize=0):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        # self.stream = cv2.VideoCapture(webcam)
        # # self.stream.set(cv2.CAP_PROP_FPS,10)
        # assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False
        # initialize the queue used to store frames read from
        # the video file
        self.batchSize = batchSize
        self.Q = LifoQueue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t= Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping infinitely
        num_frames = 0
        i = 0
        url_dir = ['1', '2',‘3’,‘4’,'5','6']
        lushu = len(url_dir)
        for n,url in enumerate(url_dir):
            locals()['stream_' + str(n)] = cv2.VideoCapture(url)
            assert locals()['stream_' + str(n)].isOpened(), 'Cannot capture source'

        while True:
            # otherwise, ensure the queue has room in it
            if not self.Q.full():
                img = []
                orig_img = []
                im_name = []
                im_dim_list = []
                
                num_frames += 1
                for n,url in enumerate(url_dir):
                    
                    for k in range(self.batchSize):
                        (grabbed, frame) = locals()['stream_' + str(n)].read()
                        
                        if num_frames % 25 == 0:
	                    # if the `grabbed` boolean is `False`, then we have
	                    # reached the end of the video file
                            if not grabbed:
                                self.stop()
                                return
                            inp_dim = int(opt.inp_dim)
                            img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                        
                            img.append(img_k)
                            orig_img.append(orig_img_k)
                            im_name.append('{}_{}.jpg'.format(n,i))
                            im_dim_list.append(im_dim_list_k)

                if len(img) !=0:
                    # print(len(img))
                    with torch.no_grad():
                        # Human Detection
                        img = torch.cat(img)
                        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)

                        self.Q.put((img, orig_img, im_name, im_dim_list))
                        i = i+1
                
                        # print(self.Q.get()[2])

            else:
                with self.Q.mutex:
                    self.Q.queue.clear()

    def getitem(self):
        # return next frame in the queue
        return self.Q.get()

    def videoinfo(self):
         # indicate the video info
         fourcc=int(self.stream.get(cv2.CAP_PROP_FOURCC))
         fps=self.stream.get(cv2.CAP_PROP_FPS)
         frameSize=(int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
         return (fourcc,fps,frameSize)
    
    def len(self):
        # return queue size
        return self.Q.qsize()

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True


class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=0):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        
        while True:
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # print(len(prediction))
                # NMS process
                dets = dynamic_write_results(prediction, opt.confidence,
                                    opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
                
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                
                dets[:, 1:5] /= scaling_factor
                # print('dets.shape='+str(dets.shape))
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:,0]==k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                
                inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB))
                inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2)

                self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2))
  
    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

class DataWriter:
    def __init__(self, save_video=False,
                savepath='examples/res/1.avi', fourcc=cv2.VideoWriter_fourcc(*'XVID'), fps=25, frameSize=(640,480),
                queueSize=1024):
        if save_video:
            # initialize the file video stream along with the boolean
            # used to indicate if the thread should be stopped or not
            self.stream = cv2.VideoWriter(savepath, fourcc, fps, frameSize)
            assert self.stream.isOpened(), 'Cannot open video for writing'
        self.save_video = save_video
        self.stopped = False
        self.final_result = []
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)
        if opt.save_img:
            if not os.path.exists(opt.outputpath + '/vis'):
                os.mkdir(opt.outputpath + '/vis')

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping infinitely
        # i = 0
        while True:
            # if the thread indicator variable is set, stop the
            # thread
            if self.stopped:
                if self.save_video:
                    self.stream.release()
                return
            # otherwise, ensure the queue is not empty
            if not self.Q.empty():
                (boxes, scores, hm_data, pt1, pt2, orig_img, im_name) = self.Q.get()
                orig_img = np.array(orig_img, dtype=np.uint8)
                pic_name = im_name.split('_')[0]
                if boxes is None:
                    if opt.save_img or opt.save_video or opt.vis:
                        img = orig_img
                        if opt.vis:
                            cv2.namedWindow('AlphaPose Demo_{}'.format(pic_name), cv2.WINDOW_NORMAL)
                            cv2.imshow("AlphaPose Demo_{}".format(pic_name), img)
                            cv2.waitKey(30)
                        if opt.save_img:
                            cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img)
                        if opt.save_video:
                            self.stream.write(img)
                else:
                    # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                    
                    preds_hm, preds_img, preds_scores = getPrediction(
                        hm_data, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW)

                    result = pose_nms(boxes, scores, preds_img, preds_scores)
                    result = {
                        'imgname': im_name,
                        'result': result
                    }
                    self.final_result.append(result)
                    if opt.save_img or opt.save_video or opt.vis:
                        img = vis_frame(orig_img, result)
                        if opt.vis:
                            # print('im_name='+str(im_name))
                            cv2.namedWindow('AlphaPose Demo_{}'.format(pic_name), cv2.WINDOW_NORMAL)
                            cv2.imshow("AlphaPose Demo_{}".format(pic_name), img)
                            cv2.waitKey(30)
                        if opt.save_img:
                            cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img)
                        if opt.save_video:
                            self.stream.write(img)
            else:
                time.sleep(0.1)

    def running(self):
        # indicate that the thread is still running
        time.sleep(0.2)
        return not self.Q.empty()

    def save(self, boxes, scores, hm_data, pt1, pt2, orig_img, im_name):
        # save next frame in the queue
        self.Q.put((boxes, scores, hm_data, pt1, pt2, orig_img, im_name))

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
        time.sleep(0.2)

    def results(self):
        # return final result
        return self.final_result

    def len(self):
        # return queue len
        return self.Q.qsize()

class Mscoco(data.Dataset):
    def __init__(self, train=True, sigma=1,
                 scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
        self.img_folder = '../data/coco/images'    # root image folders
        self.is_train = train           # training set or test set
        self.inputResH = opt.inputResH
        self.inputResW = opt.inputResW
        self.outputResH = opt.outputResH
        self.outputResW = opt.outputResW
        self.sigma = sigma
        self.scale_factor = scale_factor
        self.rot_factor = rot_factor
        self.label_type = label_type

        self.nJoints_coco = 17
        self.nJoints_mpii = 16
        self.nJoints = 33

        self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
                        9, 10, 11, 12, 13, 14, 15, 16, 17)
        self.flipRef = ((2, 3), (4, 5), (6, 7),
                        (8, 9), (10, 11), (12, 13),
                        (14, 15), (16, 17))

    def __getitem__(self, index):
        pass

    def __len__(self):
        pass


def crop_from_dets(img, boxes, inps, pt1, pt2):
    '''
    Crop human from origin image according to Dectecion Results
    '''

    imght = img.size(1)
    imgwidth = img.size(2)
    tmp_img = img
    tmp_img[0].add_(-0.406)
    tmp_img[1].add_(-0.457)
    tmp_img[2].add_(-0.480)
    for i, box in enumerate(boxes):
        upLeft = torch.Tensor(
            (float(box[0]), float(box[1])))
        bottomRight = torch.Tensor(
            (float(box[2]), float(box[3])))

        ht = bottomRight[1] - upLeft[1]
        width = bottomRight[0] - upLeft[0]
        if width > 100:
            scaleRate = 0.2
        else:
            scaleRate = 0.3

        upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
        upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
        bottomRight[0] = max(
            min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5)
        bottomRight[1] = max(
            min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5)

        inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight, opt.inputResH, opt.inputResW)
        pt1[i] = upLeft
        pt2[i] = bottomRight

    return inps, pt1, pt2

你可能感兴趣的:(alphapose,openpose,densepose,视频分析,图像算法)