最简单的方法是找个能调试的python IDE,读取各个文件之后看里面的内容,这里有个我写的方法,仔细阅读之后就可以使用了。事先安装PIL包。有错写评论即可。
第93,94行是建立各个data_batch_1等等
最后一行建立batches.meta文件
这个code生成7个data_batch文件,分为两类,一类为nopeople,一类为exist_people。需要修改网络配置文件[fc10]层outputs个数为2。
14-1-9补充:这个程序最好在linux下使用,windows下生成的文件貌似在使用时会报错。
- import os
- import cPickle
- import pickle
- import numpy as np
- from numpy import array, append
- from PIL import Image
- import Image
-
- def makeBatch (load_path, save_path, data_size):
- data = []
- filenames = []
- class_list = []
- class_file = file('train-origin-pics-labels.txt', 'rb').readlines()
- file_list = os.listdir(load_path)
- num_sq = save_path[len(save_path)-1]
- for item in file_list:
- if item.endswith(".jpg"):
- picture_number = item[0:len(item)-4]
- picture_num = int(picture_number)
- class_picture = class_file[picture_num-1][10:11]
- if int(picture_num)%100 == 0:
- print picture_number
- n = os.path.join(load_path, item)
- inputImage = Image.open(n)
- (width,height) = inputImage.size
-
-
-
-
-
-
- small_image = inputImage.resize((data_size, data_size),Image.ANTIALIAS)
- try:
- r, g, b = small_image.split()
- reseqImage = list(r.getdata()) + list(g.getdata()) + list(b.getdata())
- data.append(reseqImage)
- filenames.append(item)
- class_list.append(class_picture)
- except:
- print 'error' + picture_number
- data_array = np.array(data, dtype = np.uint8)
- T_data = data_array.T
- out_file = file(save_path, 'wb')
- dic = {'batch_label':'batch ' + num_sq + ' of 6', 'data':T_data, 'labels':class_list, 'filenames':filenames}
- pickle.dump(dic, out_file)
- out_file.close()
-
- def read_batch(batch_path, data_size):
- in_file = open(batch_path, 'r+')
- xx = cPickle.load(in_file)
- in_file.close()
- T_datas = xx['data']
- datas = T_datas.T
- c = np.zeros((1, data_size*data_size*3), dtype=np.float32)
- i = 0
- for data in datas:
- i += 1
- c = c + data
- return i, c
-
- def add_all(data_size, path):
- count = 0
- totalc = np.zeros((1, data_size*data_size*3), dtype=np.float32)
- for idx in range(1, 7):
- print 'reading batch'+str(idx)
- path += '/data_batch_' + str(idx)
- curcount, curc = read_batch(path, data_size)
- count += curcount
- totalc = totalc + curc
-
- return count, totalc
-
- def write_data(data_size, path):
- cout, total = add_all(data_size)
- a = []
- for i in range(0, len(total[0])):
- c = total[0][i] / cout
- a.append( [c])
- a_array = array(a, dtype = np.float32)
- return a_array
-
- def main(data_size, path):
- data_mean = write_data(data_size, path)
- label_names = ['nopeople', 'exist_people']
- num1 = 5000
- num2 = data_size*data_size*3
- dic = {'data_mean':data_mean, 'label_names':label_names, 'num_cases_per_batch':num1, 'num_vis':num2}
- out_file = open(path+'/batches.meta', 'w+')
- cPickle.dump(dic, out_file)
- out_file.close()
-
- data_size = 64
- for i in range(1, 7):
- makeBatch('./train-origin-pics-part'+str(i), 'baidu_data_size_'+str(data_size)+'/data_batch_'+str(i), data_size)
- main(data_size, 'baidu_data_size_'+str(data_size))
转载地址:http://blog.csdn.net/xuanwu_yan/article/details/16948385