数据清洗操作步骤

1、txt第一行删掉

2、txt中null替换为0,0,0,0,0,0(之后会删除这些帧)

3、运行data_fixation.py

#txt文件名改为数字,删第一行,将null替换为0,0,0,0,0,0,输入此程序,可获得每帧fixation点的重心
import os
import numpy as np
from numpy import *

path = "./test/"#输入文件的绝对路径
lists = os.listdir(path) #获取目录下的所有文件名

for list in lists: #遍历文件名
    txt_dir = os.path.join(path,list) #每个txt文件的路径
    position = path+list
    print(position)

    with open(position, 'r+') as f:
        for lines in f.readlines():
            #print(lines.strip())
            items = lines.strip().split(',')

            retName = []  # 列表 第一列,每帧图像名字
            retName.append(items[0])  # 第一列,每帧图像名字
            x = []  # 第一个fixation,列表
            y  = []  # 最后fixation,列表
            m = len(items)
            print(m)
            x.append([float(items[i]) for i in range(6, m-1, 2)])
            y.append([float(items[i]) for i in range(7, m, 2)])

            x1 = "".join([str(i)for i in x])  # join转为字符串
            x2=x1.replace('[','') #去除方括号
            x3=x2.replace(']','')
            y1 = "".join([str(i)for i in y])  # join转为字符串
            y2=y1.replace('[','') #去除方括号
            y3=y2.replace(']','')

            xx = x3.strip().split(',')
            yy = y3.strip().split(',')

            x.append([float(xx[i]) for i in range(0,len(xx))])
            y.append([float(yy[i]) for i in range(0, len(yy))])

            a = sum(x)/2
            b = sum(y)/2
            lx = len(xx)
            ly = len(yy)

            nx = a / lx  #fixation重心x值
            ny = b / ly  #fixation重心y值

            path1 = "C:/data/hero_fixation/482/"  # 生成每个txt的绝对路径
            retName1 = "".join(retName) #join转为字符串
            #print(retName1)
            position1 = path1 + retName1 + '.txt'  # 新建每个txt文件的路径
            #print(position1)
            with open(position1, 'a') as f1:
                f1.write(str(nx)+",")
                f1.write(str(ny))

获得以第一列字符串命名的txt文件,txt中存放此帧fixation点的重心。其中,test中为输入txt,data中为输出txt集。

4、运行data_action.py

#txt文件名改为数字,删第一行,将null替换为0,0,0,0,0,0,输入此程序,可获得每帧action
import os
path = "./test/"#输入文件的绝对路径
lists = os.listdir(path) #获取目录下的所有文件名

for list in lists: #遍历文件名
    txt_dir = os.path.join(path,list) #每个txt文件的路径
    position = path+list
    print(position)

    with open(position, 'r+') as f:
        for lines in f.readlines():
            #print(lines.strip())
            items = lines.strip().split(',')

            retName = []  # 列表 第一列,每帧图像名字
            retName.append(items[0])  # 第一列,每帧图像名字
            action = []
            action.append([int(items[i]) for i in range(5,6)])

            action1 = "".join([str(i)for i in action])  # join转为字符串
            action2=action1.replace('[','') #去除方括号
            action3=action2.replace(']','')

            path1 = "F:/ROII/cnn_yolov5/data/hero_action/train/482_RZ_3472304_Jul-18-12-52-35/"  # 生成每个txt的绝对路径
            retName1 = "".join(retName) #join转为字符串
            #print(retName1)
            position1 = path1 + retName1 + '.txt'  # 新建每个txt文件的路径
            #print(position1)
            with open(position1, 'a') as f1:
                f1.write(action3+'\n')

获得以第一列字符串命名的txt文件,txt中存放此帧action值。其中,test中为输入txt,data中为输出txt集。

数据清洗操作步骤_第1张图片

5、删除不存在fixation及fixation超出边界的帧

复制2之后txt,

运行data_delete_null.py

#data_delete_null.py
#txt文件名改为数字,删第一行,将null替换为0,0,0,0,0,0,输入此程序,可获得重心为0的帧的名称,再次输入data_null_bat.py可生成删除的bat命令
import os
import numpy as np
from numpy import *

path = "./test/"#输入文件的绝对路径
lists = os.listdir(path) #获取目录下的所有文件名

for list in lists: #遍历文件名
    txt_dir = os.path.join(path,list) #每个txt文件的路径
    position = path+list
    print(position)
    print(list)

    with open(position, 'r+') as f:
        for lines in f.readlines():
            #print(lines.strip())
            items = lines.strip().split(',')

            retName = []  # 列表 第一列,每帧图像名字
            retName.append(items[0])  # 第一列,每帧图像名字
            x = []  # 第一个fixation,列表
            y  = []  # 最后fixation,列表
            m = len(items)
            #print(m)
            x.append([float(items[i]) for i in range(6, m-1, 2)])
            y.append([float(items[i]) for i in range(7, m, 2)])

            x1 = "".join([str(i)for i in x])  # join转为字符串
            x2=x1.replace('[','') #去除方括号
            x3=x2.replace(']','')
            y1 = "".join([str(i)for i in y])  # join转为字符串
            y2=y1.replace('[','') #去除方括号
            y3=y2.replace(']','')

            xx = x3.strip().split(',')
            yy = y3.strip().split(',')

            x.append([float(xx[i]) for i in range(0,len(xx))])
            y.append([float(yy[i]) for i in range(0, len(yy))])

            a = sum(x)/2
            b = sum(y)/2
            lx = len(xx)
            ly = len(yy)

            nx = a / lx  #fixation重心x值
            ny = b / ly  #fixation重心y值


            path1 = "./Anullout/"  # 生成每个txt的绝对路径
            retName1 = "".join(retName) #join转为字符串
            #print(retName1)
            #position1 = path1 + retName1 + '.txt'  # 新建每个txt文件的路径
            position1 = path1 + list # 新建每个txt文件的路径
            if nx==0:
                with open(position1, 'a') as f1:
                    f1.write(str(retName1) + "\n")
            elif ny==0:
                with open(position1, 'a') as f1:
                    f1.write(str(retName1) + "\n")
            elif nx>160:
                with open(position1, 'a') as f1:
                    f1.write(str(retName1) + "\n")
            elif ny>210:
                with open(position1, 'a') as f1:
                    f1.write(str(retName1) + "\n")

生成剔除帧的编号,改为数字后,输入data_null_bat.py改为bat命令

#data_null_bat.py
#将data_deleta_null.py生成输入data_null_bat.py可生成删除的bat命令
import os
import re
import sys
from numpy import *

path = "./Anullout/"#输入文件的绝对路径
lists = os.listdir(path) #获取目录下的所有文件名

for list in lists: #遍历文件名
    txt_dir = os.path.join(path,list) #每个txt文件的路径
    position = path+list
    print(position)
    print(list)
    with open(position, 'r+') as f:
        for lines in f.readlines():
            #print(lines.strip())
            line = lines[:-1]
            #print(line.zfill(7))
            path1 = "./Anullout/bat/"  # 输出文件的绝对路径
            position1 = path1 + list   # 新建每个txt文件的路径
            rs=line.zfill(7)  #每个序号改为0000001,0000002
            newname = rs.replace(rs, 'del' + ' ' + rs + '.png')
            with open(position1, 'a') as f1:
                f1.write(str(newname + "\n"))

你可能感兴趣的:(python,开发语言)