yolo V5学习笔记

yolo V5学习笔记2021.8.5

新手上路学习自用

xyxy2xywh(x)示例

def xyxy2xywh(x):
    # Convert nx4 boxes from 
    # [x1, y1, x2, y2] to [x, y, w, h] 
    # where xy1=top-left, xy2=bottom-right** 
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
    y[:, 2] = x[:, 2] - x[:, 0]  # width
    y[:, 3] = x[:, 3] - x[:, 1]  # height
    return y
    
bbox1 = [100., 100., 200., 200.]  # [x1, y1, x2, y2]
bbox2 = [120., 120., 220., 220.]
x = np.array([bbox1, bbox2])
'''
	x = [[100., 100., 200., 200.]
	     [120., 120., 220., 220.]]
'''

二维数组[a,b],a是行、b是列;
X[:,0] , 取二维数组的第一列所有行的数据
X[1,:], 取二维数组的第二行所有列的数据
X[:,m:n] ,取二维数组的第m列到第n-1列所有行的数据
X[m:n,:], 取二维数组的第m行到第n-1行所有列的数据


python .view示例

# normalized xywh
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  

out = out.view(4, -1)
这个方法是对已知的进行reshape
中间有参数-1,这表明对out进行reshape,reshape成4行,列数由行数决定

batchsize, channels, height, width = x.data.size()
channels_per_group = int(channels / self.groups) # 每组几个通道
x = x.view(batchsize, self.groups, channels_per_group, height, width) # reshape
x = x.view(batchsize, -1, height, width)

用x.data.size() 得到X的四个维度,然后第二行求出来第三行需要的一个参数,第三行的view第0,3,4都是x.data.size() 得到的,没有发生变化,因此view方法很聪明就将channels进行reshape。
第四行相当于逆操作,其他三维都不变化,因此把中间的第1,2维reshape为一个向量.


if else示例

dir = save_dir / 'labels' if save_txt else save_dir

#等价于
if save_txt:
	dir = save_dir / 'labels'
else:
	dir = save_dir

python 在列表,元组,字典变量前加*号

xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format

在列表、元组、字典,由numpy生成的向量前加*号,会将其拆分成一个一个的独立元素;

list = [1,2,3,4]
tuple = (1,2,3,4)
dict = {'1':'a','2':'b','3':'c',}
range = np.arange(1,3)*号后:
*list = 1 2 3 4
*tuple = 1 2 3 4
*dict = 1 2 3 
*range = 1 2

最后需要注意的是:

1.可迭代对象才可以使用*号拆分;
2.带 * 号变量严格来说并不是一个变量,而更应该称为参数,它是不能赋值给其他变量的,但可以作为参数传递;


torch.tensor

im0.shape = (3000, 4000, 3)
torch.tensor(im0.shape)
#结果为tensor([3000, 4000,    3])
torch.tensor(im0.shape)[[1, 0, 1, 0]]
#结果为tensor([4000, 3000, 4000, 3000])
>>> import torch
>>> a = [100, 200, 3]
>>> torch.tensor(a)
tensor([100, 200,   3])
>>> b = torch.tensor(a)
>>> b[[1,0,1,0]]
tensor([200, 100, 200, 100])

xywh归一化转换为比例

gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
xywh_my = xyxy2xywh(torch.tensor(xyxy).view(1, 4)).view(-1).tolist()
line = (cls, *xywh_my, conf) if save_conf else (cls, *xywh_my)  # label format
*xywh 

'''
line 的结果如下:
cls 为物体识别标志代码; *xywh_my为列表的独立元素; conf为置信度
'''
#xywh的line
14 0.266625 0.806333 0.02775 0.0233333 0.370673
#xywh_my的line
14 1066.5 2419 111 70 0.370673

已知im0.shape = (3000, 4000, 3)[4000, 3000, 4000, 3000,]
所以1066.5 /4000 = 0.266625
    2419   /3000 = 0.806333 

plot_one_box()

def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV画框
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))  #c1 = (int(x[0]), int(x[1]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness)


plot_one_box()中cv2.putText() 加入FPS参数

原始程序

def plot_one_box(x, im, color=(128, 128, 128), label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    tf = max(tl - 1, 1)  # font thickness
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) #c1 = (int(x[0]), int(x[1]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    
    if label:
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled 在这里填色做背景版
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

对其修改,加入FPS帧数显示:引入t1和t2变量,计算video_frame

def plot_one_box(x, im, t1, t2, color=(128, 128, 128), label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    tf = max(tl - 1, 1)  # font thickness
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) #c1 = (int(x[0]), int(x[1]))
    video_frame = 1 / (t2 - t1) #计算FPS
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    
    if label:
    	content = label + ' ' + str(int(video_frame)) + 'FPS'
        t_size = cv2.getTextSize(content , 0, fontScale=tl / 3, thickness=tf)[0]
        #t_size 计算红色背景底板的长度大小
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled 在这里填色做背景版
        cv2.putText(im, content, (c1[0], c1[1] - 2), 0,      tl / 3,  [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
                    图片; 内容;       左上坐标;     字体大小;/字体厚度;    颜色;   用于绘制文本的线条的字体粗细;线形形状;

udp_send()

xywh_my = xyxy2xywh(torch.tensor(xyxy).view(1, 4)).view(-1).tolist()
x_y = xywh_my[0],xywh_my[1] #两个纯数 x y
data_x = xywh_my[0]
data_y = xywh_my[1]

udp_send(data_x ,data_y)

def udp_send(x,y):
    # build socket objiect
    # SOCK_DGRAM    udp model
    s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
    # send data  bytes

    # for data in [b'Michael', b'Tracy', b'Sarah']:
    sendArr = ('127.0.0.1', 8000)
    sendData = []
    while True:
        sendData.append(x)
        sendData.append(y)
        print(sendData)
        s.sendto(sendData.encode('utf-8'), sendArr)  # utf-8,gbk
        print(s.recv(1024).decode('utf-8'))

报错:
File "F:/CODE/yoloV5/yolov5-master/detect.py", line 166, in run
    udp_send(data_x ,data_y)
File "F:\CODE\yoloV5\yolov5-master\udp\send.py", line 17, in udp_send
    s.sendto(sendData.encode('utf-8'), sendArr)  # utf-8,gbk
AttributeError: 'list' object has no attribute 'encode'

解决方式:
使用 join() 函数,将列表中字符串使用某种字符串连接,形式——str.join(list) ,示例:

a = ['abc','123','xyz789']
b = ','
c =b.join(a)
print(c)
print(type(c)

运行结果:
abc,123,xyz789
<class 'str'>
s.sendto(','.join(sendData).encode('utf-8'), sendArr)  # utf-8,gbk

结果又报错:
TypeError: sequence item 0: expected str instance, float found

修改后:
s.sendto(','.join(str(data) for data in sendData).encode('utf-8'), sendArr)  # utf-8,gbk
sendData.clear() #清空sendData列表,否则会越来越多

查了资料后,说list包含数字,不能直接转化成字符串。

这里sendData为纯数字

解决办法:print(’ ‘.join(str(Data) for Data in sendData)) (建议)
或 print(" ".join(’%s’ %Data for Data in sendData))

即遍历list的元素,把他转化成字符串。这样就能成功输出 数字,数字 结果了。


画面中只画“人”类的框和标签

原程序(原版+FPS的):

def plot_one_box(x, im, t1, t2, color=(128, 128, 128), label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    tf = max(tl - 1, 1)  # font thickness
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) #c1 = (int(x[0]), int(x[1]))
    video_frame = 1 / (t2 - t1)
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)

    if label:
        content = label + ' ' + str(int(video_frame)) + 'FPS'
        t_size = cv2.getTextSize(content, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, content, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

修改后:

def plot_one_box(x, im, t1, t2, color=(128, 128, 128), conf=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    #if label:
    # 只有检测到人才画
    #注意这个原来这个label是带概率的,即'person 0.74'
    if label == 'person':
        assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
        tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
        tf = max(tl - 1, 1)  # font thickness
        c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) #c1 = (int(x[0]), int(x[1]))
        video_frame = 1 / (t2 - t1)
        cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)

        '''
        原来的conf的值为: tensor(0.77406, device='cuda:0') conf的类型为
        conf.item()可取出0.77406
        '''

        conf = conf.item()
        conf = f'{conf:.2f}'
        label = label + ' ' + conf
        content = label + ' ' + str(int(video_frame)) + 'FPS'
        t_size = cv2.getTextSize(content, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, content, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

你可能感兴趣的:(python,深度学习,计算机视觉,opencv,神经网络)