解压后就是分类数据集的标准目录格式,二级目录名为人类活动类别,二级目录下就是对应的视频数据。每个视频长度为4s,大小320*240, 帧率25HZ。
需要注意: 相同的活动下,有不同的视频是截取自同一个长视频的片段,即视频中的人物和背景等特征基本相似,因此为避免此类视频被分别划分到train和test集合引起训练效果不合实际的过大,UCF放提供了标准的train和test集合检索文件。
import os
import os.path
def get_train_test_lists(version='01'):
Using one of the train/test files (01, 02, or 03), get the filename
breakdowns we'll later use to move everything.
# Get our files based on version.
test_file = os.path.join('ucfTrainTestlist', 'testlist' + version + '.txt')
train_file = os.path.join('ucfTrainTestlist', 'trainlist' + version + '.txt')
# Build the test list.
with open(test_file) as fin:
test_list = [row.strip() for row in list(fin)]
# Build the train list. Extra step to remove the class index.
with open(train_file) as fin:
train_list = [row.strip() for row in list(fin)]
train_list = [row.split(' ')[0] for row in train_list]
# Set the groups in a dictionary.
file_groups = {
'train': train_list,
'test': test_list
return file_groups
def move_files(file_groups):
"""This assumes all of our files are currently in _this_ directory.
So move them to the appropriate spot. Only needs to happen once.
# Do each of our groups.
for group, videos in file_groups.items():
# Do each of our videos.
for video in videos:
# Get the parts.
#parts = video.split(os.path.sep)
parts = video.split('/') #修改
classname = parts[0]
filename = parts[1]
# Check if this class exists.
if not os.path.exists(os.path.join(group, classname)):
print("Creating folder for %s/%s" % (group, classname))
os.makedirs(os.path.join(group, classname))
# Check if we have already moved this file, or at least that it
# exists to move
filename_input=os.path.join('UCF-101',classname, filename) #新增加
if not os.path.exists(filename_input):
print("Can't find %s to move. Skipping." % (filename))
# Move it.
dest = os.path.join(group, classname, filename)
print("Moving %s to %s" % (filename, dest))
os.rename(filename_input, dest)
def main():
Go through each of our train/test text files and move the videos
to the right place.
# Get the videos in groups so we can move them.
group_lists = get_train_test_lists()
# Move the files.
if __name__ == '__main__':
‘ffmpeg’ 不是内部或外部命令,也不是可运行的程序
call([“ffmpeg”, “-i”, src, dest])替换为call([“path of ffmpeg.exe”, “-i”, src, dest])
After moving all the files using the 1_ file, we run this one to extract
the images from the videos and also create a data file we can use
for training and testing later.
import csv
import glob
import os
import os.path
from subprocess import call
def extract_files():
"""After we have all of our videos split between train and test, and
all nested within folders representing their classes, we need to
make a data file that we can reference when training our RNN(s).
This will let us keep track of image sequences and other parts
of the training process.
We'll first need to extract images from each of the videos. We'll
need to record the following data in the file:
[train|test], class, filename, nb frames
Extracting can be done with ffmpeg:
`ffmpeg -i video.mpg image-%04d.jpg`
data_file = []
folders = ['train', 'test']
for folder in folders:
class_folders = glob.glob(os.path.join(folder, '*'))
for vid_class in class_folders:
class_files = glob.glob(os.path.join(vid_class, '*.avi'))
for video_path in class_files:
# Get the parts of the file.
video_parts = get_video_parts(video_path)
train_or_test, classname, filename_no_ext, filename = video_parts
# Only extract if we haven't done it yet. Otherwise, just get
# the info.
if not check_already_extracted(video_parts):
# Now extract it.
src = os.path.join(train_or_test, classname, filename)
dest = os.path.join(train_or_test, classname,
filename_no_ext + '-%04d.jpg')
call(["ffmpeg", "-i", src, dest])
# Now get how many frames it is.
nb_frames = get_nb_frames_for_video(video_parts)
data_file.append([train_or_test, classname, filename_no_ext, nb_frames])
print("Generated %d frames for %s" % (nb_frames, filename_no_ext))
with open('data_file.csv', 'w') as fout:
writer = csv.writer(fout)
print("Extracted and wrote %d video files." % (len(data_file)))
def get_nb_frames_for_video(video_parts):
"""Given video parts of an (assumed) already extracted video, return
the number of frames that were extracted."""
train_or_test, classname, filename_no_ext, _ = video_parts
generated_files = glob.glob(os.path.join(train_or_test, classname,
filename_no_ext + '*.jpg'))
return len(generated_files)
def get_video_parts(video_path):
"""Given a full path to a video, return its parts."""
parts = video_path.split(os.path.sep)
filename = parts[2]
filename_no_ext = filename.split('.')[0]
classname = parts[1]
train_or_test = parts[0]
return train_or_test, classname, filename_no_ext, filename
def check_already_extracted(video_parts):
"""Check to see if we created the -0001 frame of this file."""
train_or_test, classname, filename_no_ext, _ = video_parts
return bool(os.path.exists(os.path.join(train_or_test, classname,
filename_no_ext + '-0001.jpg')))
def main():
Extract images from videos and build a new file that we
can use as our data input file. It can have format:
[train|test], class, filename, nb frames
if __name__ == '__main__':