实现xml文件随机划分的python脚本

实习的时候,主要是做物体检测,需要将XML文件划分进训练集(train)、验证集(val)和测试集(test)。

于是mentor给了个任务,要求写一个python脚本实现对XML文件的随机划分,具体要求如下:

1.新建两个文件夹test和trainval,其中test文件夹中保留划分进测试集的XML文件,并在其中新建一个txt文档记录文件名;trainval文件夹中保留划分进训练集和验证集的XML文件,并在其中新建一个txt文档记录文件名。

2.在trainval文件夹中新建两个文件夹train和val,其中train文件夹中保留trainval文件夹中被划分进训练集的XML文件,并在其中新建一个txt文档记录文件名;val文件夹中保留trainval文件夹中被划分进验证集的XML文件,并在其中新建一个txt文档记录文件名。

3.要求对文件进行随机划分。

4.要求test文件夹中的文件数目为总文件数目的1/2,train文件夹中的文件数目为总文件数目的1/4,val文件夹中的文件数目为总文件数目的1/4。

即,总文件 = test文件夹中的文件 + trainval文件夹中的文件,二者各占1/2。

其中,trainval文件夹中的文件 = train文件夹中的文件 + val文件夹中的文件,二者各占1/2。

用了两个小时完成该脚本代码,脚本代码如下:

#coding:utf-8
import os
import random
import shutil
import sys
import getopt

def main(argv):
  try:
    opts,args = getopt.getopt(argv, "hd:", ["dir="])
  except getopt.GetoptError:
    print 'Error: fileSeperate.py -d '
    print '   or: fileSeperate.py --targetDir='
    sys.exit(2)

  for opt,arg in opts:
    if opt == "-h":
      print "fileSeperate.py -d "
      print "or:fileSeperate.py --targetDir="
      sys.exit()
    elif opt in ("-d", "--targetDir"):
      root = arg

  #Create dir
  #root = "/home/ts/python/FileSeperate"

  trainval = os.path.join(root, "trainval")
  test = os.path.join(root, "test")
  trainval_isExists = os.path.exists(trainval)
  test_isExists = os.path.exists(test)
  if not trainval_isExists:
    os.mkdir("trainval")
  if not test_isExists:
    os.mkdir("test")

  #Counting number of files
  count = 0
  files = os.listdir(root)
  for i in files:
    if os.path.isfile(os.path.join(root, i)):
      count = count + 1

  all_xml_list = [];
  random_name_list_for_test = [];
  name_list_for_trainval = [];

  for name in os.listdir(root):
    if ".xml" in name:
      all_xml_list.append(name)
  #print "all: ", all_xml_list

  #Select files which will be put into dir 'test' randomly
  while 1:
    random_name = random.choice(all_xml_list)
    if random_name not in random_name_list_for_test:
      if len(random_name_list_for_test) < count/2:
        random_name_list_for_test.append(random_name)
      else:
        break
  #print "test: ", random_name_list_for_test

  #Put files into dir 'test' and create index txt
  for name in random_name_list_for_test:
    shutil.copy(os.path.join(root, name), test)
    fp_test = open(os.path.join(test, "test.txt"), "a")
    fp_test.write(name[0:len(name)-4])
    fp_test.write("\n")

  #Record files which will be put into dir 'trainval'
  for name in all_xml_list:
    if name not in random_name_list_for_test:
      name_list_for_trainval.append(name)
      if len(name_list_for_trainval) == count - len(random_name_list_for_test):
        break
  #print "trainval: ", name_list_for_trainval

  #Put files into dir 'trainval' and create index txt
  for name in name_list_for_trainval:
    shutil.copy(os.path.join(root, name), trainval)
    fp_test = open(os.path.join(trainval, "trainval.txt"), "a")
    fp_test.write(name[0:len(name) - 4])
    fp_test.write("\n")


  #Operation in dir 'trainval'
  train = os.path.join(trainval, "train")
  val = os.path.join(trainval, "val")
  train_isExists = os.path.exists(train)
  val_isExists = os.path.exists(val)
  if not train_isExists:
    os.mkdir(os.path.join(trainval,"train"))
  if not val_isExists:
    os.mkdir(os.path.join(trainval,"val"))

  count = 0
  files = os.listdir(trainval)
  for i in files:
    if os.path.isfile(os.path.join(trainval, i)):
      count = count + 1

  train_list = []
  val_list = []
  while 1:
    random_name = random.choice(name_list_for_trainval)
    if random_name not in train_list:
      if len(train_list) < count/2:
        train_list.append(random_name)
      else:
        break

  for name in train_list:
    shutil.copy(os.path.join(trainval, name), train)
    fp_test = open(os.path.join(train, "train.txt"), "a")
    fp_test.write(name[0:len(name)-4])
    fp_test.write("\n")

  for name in name_list_for_trainval:
    if name not in train_list:
      val_list.append(name)
      if len(val_list) == count - len(train_list):
        break

  for name in val_list:
    shutil.copy(os.path.join(trainval, name), val)
    fp_test = open(os.path.join(val, "val.txt"), "a")
    fp_test.write(name[0:len(name) - 4])
    fp_test.write("\n")


if __name__ == "__main__":
  main(sys.argv[1:])


 
  

你可能感兴趣的:(python)