AI辅助瞄准系统开发与实战(二)

文章目录

  • 前言
  • Yolov5 Detect
  • 鼠标控制
    • 监听
    • 目标跟踪
  • 控制器
    • 算法整合
  • 总结

前言

回顾上文,在这里的话,我完成了基本的GUI界面的设计,和窗口图像的工具代码。当然这个绘制窗口的代码有点问题,那就是,绘制的窗口有闪烁。我看了很多解决方案,都试了,包括双缓存方案,都存在一定的问题,导致绘制的窗口存在闪烁问题。当然也有可能是游戏问题导致的,具体是啥,我实在是没有时间去搞了。

Yolov5 Detect

ok,现在我们可以开始准备整合我们的算法了。当然现在值得一提的是,现在我还没有游戏人物的数据集,因此,我这里做的还是coco,后面再换成游戏的。只需要重新训练即可,然后写一下过滤的标签,就可以了。


class YoloDectect():
    def __init__(self,
                 model_pb_path=r'F:\projects\PythonProject\YOLOv5-Lite-master\weights\v5lite-s.onnx',
                 label_path='coco.names',
                 confThreshold=0.6,
                 nmsThreshold=0.3,
                 objThreshold=0.6):

        so = ort.SessionOptions()
        so.log_severity_level = 3
        self.net = ort.InferenceSession(model_pb_path, so)
        self.classes = list(map(lambda x: x.strip(), open(label_path, 'r').readlines()))
        self.num_classes = len(self.classes)
        """
        数据集聚类得到的anchors的参数
        """
        anchors = [[10, 13, 16, 30, 33, 23],
                   [30, 61, 62, 45, 59, 119],
                   [116, 90, 156, 198, 373, 326]
                   ]
        self.nl = len(anchors)
        self.na = len(anchors[0]) // 2
        self.no = self.num_classes + 5
        self.grid = [np.zeros(1)] * self.nl
        self.stride = np.array([8., 16., 32.])
        self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
        self.confThreshold = confThreshold
        self.nmsThreshold = nmsThreshold
        self.objThreshold = objThreshold
        self.input_shape = (self.net.get_inputs()[0].shape[2], self.net.get_inputs()[0].shape[3])

    def resize_image(self, srcimg, keep_ratio=True):
        """
        修改图像尺寸为目标网络尺寸
        :param srcimg:
        :param keep_ratio:
        :return:
        """
        top, left, newh, neww = 0, 0, self.input_shape[0], self.input_shape[1]
        if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
            hw_scale = srcimg.shape[0] / srcimg.shape[1]
            if hw_scale > 1:
                newh, neww = self.input_shape[0], int(self.input_shape[1] / hw_scale)
                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
                left = int((self.input_shape[1] - neww) * 0.5)
                img = cv2.copyMakeBorder(img, 0, 0, left, self.input_shape[1] - neww - left, cv2.BORDER_CONSTANT,
                                         value=0)  # add border
            else:
                newh, neww = int(self.input_shape[0] * hw_scale), self.input_shape[1]
                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
                top = int((self.input_shape[0] - newh) * 0.5)
                img = cv2.copyMakeBorder(img, top, self.input_shape[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
        else:
            img = cv2.resize(srcimg, self.input_shape, interpolation=cv2.INTER_AREA)
        return img, newh, neww, top, left

    def _make_grid(self, nx=20, ny=20):
        xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
        return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)

    def postprocess(self, frame, outs, pad_hw):

        """
        完成目标识别和NMS,并且得到目标的左上角坐标和宽高,
        :param frame:
        :param outs:
        :param pad_hw:
        :return: results = [{'box':box,'cls':cls,'conf':conf,'id':id}]
        """
        newh, neww, padh, padw = pad_hw
        frameHeight = frame.shape[0]
        frameWidth = frame.shape[1]
        ratioh, ratiow = frameHeight / newh, frameWidth / neww
        classIds = []
        confidences = []
        box_index = []
        boxes = []
        outs = outs[outs[:, 4] > self.objThreshold]
        for detection in outs:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > self.confThreshold:  # and detection[4] > self.objThreshold:
                center_x = int((detection[0] - padw) * ratiow)
                center_y = int((detection[1] - padh) * ratioh)
                width = int(detection[2] * ratiow)
                height = int(detection[3] * ratioh)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])
        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
        for ix in indices:
            box_index.append(ix)
        result = []
        for i in box_index:
            box = boxes[i]
            cls = self.classes[classIds[i]]
            conf = confidences[i]
            item = {'box':box,'cls':cls,'conf':conf,'id':classIds[i]}
            result.append(item)
        return result

    def detect(self, srcimg):
        img, newh, neww, top, left = self.resize_image(srcimg)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32) / 255.0
        blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
        outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)
        row_ind = 0
        for i in range(self.nl):
            h, w = int(self.input_shape[0] / self.stride[i]), int(self.input_shape[1] / self.stride[i])
            length = int(self.na * h * w)
            if self.grid[i].shape[2:4] != (h, w):
                self.grid[i] = self._make_grid(w, h)
            outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(
                self.grid[i], (self.na, 1))) * int(self.stride[i])
            outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(
                self.anchor_grid[i], h * w, axis=0)
            row_ind += length
        results = self.postprocess(srcimg, outs, (newh, neww, top, left))
        return results



算法的整合部分其实,就是先前我玩的 YOLOV5-Lite的部署代码。只是我把输出数据改了。

鼠标控制

之后还是我们的鼠标控制功能。昨天我们虽然做了,但是那是很简陋的,没有把实际的功能进行整合。在这里我们主要有两个块。

监听

这里我要单独说是因为,这里遇到了bug,我这里使用的是pynput 进行监听,里面有一个Listener,但是当我整合GUI的时候,发生线程的问题。先前我的解决方案是,打算直接用win32造轮子的,但是代码都写完了,很难改了。最后没办法,找了个折中的方案。


    #停止枪口定位的,因为很多功能和鼠标左键绑定,需要进行操作的时候,就需要处理好这个
    def __func_stop_gan(self, key):
        # 检测是否按下 F12 键
        if key == keyboard.Key.f12 and self.listener_btn_mu:
            self.listener_left_mu=False

    def __start_listener(self):
        while (self.star_listener_flag):
            if(not self.listener):
                self.listener = Listener(on_click=self.__func_stay_gan)
            else:
                self.listener.start()
                # time.sleep(0.1)

    def __start_listener_right(self):
        while (self.star_listener_right_flag):
            if(not self.listener_right):
                self.listener_right = Listener(on_click=self.__func_stay_track)
            else:
                self.listener_right.start()
                # time.sleep(0.1)


    def __start_listener_btn(self):
        while (self.star_listener_btn_flag):
            if(not self.listener_btn):
                self.listener_btn = keyboard.Listener(on_press=self.__func_stop_gan)
            else:
                self.listener_btn.start()
                # time.sleep(0.1)

    def func_run_mouse(self):
        """
        启动鼠标部分的功能
        :return:
        """
        if(self.listener_started):
            return

        self.listener_btn_mu = True
        self.listener_left_mu = True
        self.listener_right_mu = True

        self.star_listener_flag = True
        self.star_listener_right_flag = True
        self.star_listener_btn_flag = True

        #这两个是按下鼠标之后才可以启动的
        # self.tracking_open = True
        # self.stop_stay_location_flag = False
        self.listener_started = True
        self.draw_helper_pool.execute(self.__start_listener)
        self.draw_helper_pool.execute(self.__start_listener_right)
        self.draw_helper_pool.execute(self.__start_listener_btn)

再开启一个线程,然后写进死循环,通过结束变量控制结束。然后一致轮询start()。来手动确保在进行监听。

目标跟踪

之后是目标跟踪。
这里的话,我们项目启动的时候,将启动大概两个线程。一个是识别的,然后是跟踪的。识别的话不断更新最新的目标位置,然后我跟踪就不断到那里去。由于我的目的是保证尽可能到最新的位置上去,因此,完全不用担心读写锁的问题,没有必要当我移动到那个位置之后,在识别,然后移动。直接读就好了,一来是实现简单,而来是这样看起来有一点连贯性,同时算法的算力消耗底。而且本身在这里我也是设置了两个帧数,一个是算法识别的帧数,一个是窗口绘制的帧数。

    def func_track_ing(self):
        """
        计算得到离得最近的目标,然后进行跟踪,这个也是异步的
        设置self.tracking_open=False结束线程,结束跟踪
        这里的话,后面还可以过滤一下,辅助瞄准头部,还是身体,目前这边是跟踪
        离当前鼠标位置最近的。
        """
        def tracking():
            while(self.tracking_open):
                # 这个是100%来得到的,items里面的是125%得到的,因为它是直接截屏得到的
                x, y = pydirectinput.position()
                target_x, target_y = x, y
                dist = float('inf')
                flag_t = False
                for it in self.items:
                    if(ScreenUtils.fitiler(it)):
                        centerx, centery = (it['box'][0] + it['box'][0] + it['box'][2]) // 2, (
                                    it['box'][1] + it['box'][1] + it['box'][3]) // 2
                        c_dist = ((centerx - x) ** 2 + (centery - y) ** 2) ** 0.5
                        if (c_dist < dist):
                            dist = c_dist
                            target_x, target_y = centerx, centery
                            flag_t = True
                if(flag_t):
                    self.move_mouse(target_x,target_y,0.5,1)

        self.draw_helper_pool.execute(tracking)

控制器

之后就是我们的控制器了,这个控制器主要是做整合。

class MyController:
    """
    负责获全局控制。
    """
    def __init__(self):
        self.pools = ThreadPoolManager(max_workers=3)
        self.net = YoloDectect()
        self.drawer = RectangleDrawer()
        self.go = True
        self.move_mouse = MoveMouse()
        self.drawer_flag = True
        self.alg_open = True

    def check_open_gan_gui(self):
        """
        为GUI提供开启枪口定位的功能
        :return:
        """
        self.move_mouse.listener_left_mu = True

    def check_stop_gan_gui(self):
        self.move_mouse.listener_left_mu = False

    def check_open_draw_gui(self):
        self.drawer_flag = True

    def check_stop_draw_gui(self):
        self.drawer_flag = False

    def check_open_alg_gui(self):
        self.alg_open = True

    def check_stop_alg_gui(self):
        self.alg_open = False

    def check_open_track_gui(self):
        self.move_mouse.listener_right_mu = True

    def check_stop_track_gui(self):
        self.move_mouse.listener_right_mu = False

    def start(self):
        #全部功能
        #1. 开启鼠标控制的功能
        self.move_mouse.func_run_mouse()
        #2. 开启算法和绘制图像的功能
        self.drawer_flag = True
        self.alg_open = True
        self.go = True
        self.move_mouse.listener_started = False
        # listener_thread = threading.Thread(target=self.runing,args=(10,60,))
        # listener_thread.start()
        self.pools.execute(self.runing,10,70)

    def runing(self,fps,draw_fps):
        """
        :param fps: 扫描帧率,这个不需要太高,差不多就可以
        :return:
        """
        #这里的fps是指,绘制fps,这个可以高一点
        w, h = ScreenUtils.get_real_resolution()
        self.monitor = {"top": 0, "left": 0, "width": w, "height": h}
        self.mss_obj = mss.mss()
        self.drawer.drawRectanglesBySelf(fps=draw_fps)
        need_p_time = 1/fps
        while self.go:
            start_time = time.time()
            #开启算法
            if(self.alg_open):
                # 获取屏幕截图
                screenshot = self.mss_obj.grab(self.monitor)
                # 将截图转换为OpenCV格式
                screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
                results = self.net.detect(screenshot_cv)
                self.drawer.setItems(results)
                self.move_mouse.setItems(results)
            else:
                self.drawer.setItems([])
                self.move_mouse.setItems([])
            #开启绘图
            if(self.drawer_flag):
                self.drawer.draw_recgs = True
            else:
                self.drawer.draw_recgs = False
            # self.drawer.drawRectangles()
            end_time = time.time()
            real_time = end_time-start_time
            dt = need_p_time - real_time
            if(dt>0):
                time.sleep(dt)

    def start_pause(self):
        #挂起,修改一下标志位就好了,先不要停止
        self.move_mouse.listener_right_mu = False
        self.move_mouse.listener_left_mu = False
        self.move_mouse.listener_btn_mu = False
        self.drawer_flag = False
        self.alg_open = False

    def stop_pause(self):
        #恢复
        self.move_mouse.listener_right_mu = True
        self.move_mouse.listener_left_mu = True
        self.move_mouse.listener_btn_mu = True
        self.drawer_flag = True
        self.alg_open = True

    def stop_all(self):
        #所有的算法程序
        self.move_mouse.func_stop_mouse()
        self.go = False
        self.drawer_flag = False
        self.alg_open = False
        self.move_mouse.listener_started = True

到这里,我们的控制器就基本做了了。
里面主要包括了如下功能

算法整合

这个部分 的代码主要是这里:

 def runing(self,fps,draw_fps):
        """
        :param fps: 扫描帧率,这个不需要太高,差不多就可以
        :return:
        """
        #这里的fps是指,绘制fps,这个可以高一点
        w, h = ScreenUtils.get_real_resolution()
        self.monitor = {"top": 0, "left": 0, "width": w, "height": h}
        self.mss_obj = mss.mss()
        self.drawer.drawRectanglesBySelf(fps=draw_fps)
        need_p_time = 1/fps
        while self.go:
            start_time = time.time()
            #开启算法
            if(self.alg_open):
                # 获取屏幕截图
                screenshot = self.mss_obj.grab(self.monitor)
                # 将截图转换为OpenCV格式
                screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
                results = self.net.detect(screenshot_cv)
                self.drawer.setItems(results)
                self.move_mouse.setItems(results)
            else:
                self.drawer.setItems([])
                self.move_mouse.setItems([])
            #开启绘图
            if(self.drawer_flag):
                self.drawer.draw_recgs = True
            else:
                self.drawer.draw_recgs = False
            # self.drawer.drawRectangles()
            end_time = time.time()
            real_time = end_time-start_time
            dt = need_p_time - real_time
            if(dt>0):
                time.sleep(dt)

然后我们有很多标志位去控制。

总结

ok,那么这篇文章就先到这里,我们后面完成全部整合

你可能感兴趣的:(人工智能,人工智能)