最简单的方法是找个能调试的python IDE,读取各个文件之后看里面的内容,这里有个我写的方法,仔细阅读之后就可以使用了。事先安装PIL包。有错写评论即可。
第93,94行是建立各个data_batch_1等等
最后一行建立batches.meta文件
这个code生成7个data_batch文件,分为两类,一类为nopeople,一类为exist_people。需要修改网络配置文件[fc10]层outputs个数为2。
14-1-9补充:这个程序最好在linux下使用,windows下生成的文件貌似在使用时会报错。
import os import cPickle import pickle import numpy as np from numpy import array, append from PIL import Image import Image def makeBatch (load_path, save_path, data_size): data = [] filenames = [] class_list = [] class_file = file('train-origin-pics-labels.txt', 'r+').readlines() file_list = os.listdir(load_path) num_sq = save_path[len(save_path)-1] for item in file_list: if item.endswith(".jpg"): picture_number = item[0:len(item)-4] picture_num = int(picture_number) class_picture = class_file[picture_num-1][10:11] if int(picture_num)%100 == 0: print picture_number n = os.path.join(load_path, item) inputImage = Image.open(n) (width,height) = inputImage.size #if width > height: # newwidth = width/height*128 # small_image = inputImage.resize((newwidth, 128),Image.ANTIALIAS) #else: # newheight = height/width*128 # small_image = inputImage.resize((128, newheight),Image.ANTIALIAS) small_image = inputImage.resize((data_size, data_size),Image.ANTIALIAS) try: r, g, b = small_image.split() reseqImage = list(r.getdata()) + list(g.getdata()) + list(b.getdata()) data.append(reseqImage) filenames.append(item) class_list.append(class_picture) except: print 'error' + picture_number data_array = np.array(data, dtype = np.uint8) T_data = data_array.T out_file = file(save_path, 'w') dic = {'batch_label':'batch ' + num_sq + ' of 6', 'data':T_data, 'labels':class_list, 'filenames':filenames} pickle.dump(dic, out_file) out_file.close() def read_batch(batch_path, data_size): in_file = open(batch_path, 'r+') xx = cPickle.load(in_file) in_file.close() T_datas = xx['data'] datas = T_datas.T c = np.zeros((1, data_size*data_size*3), dtype=np.float32) i = 0 for data in datas: i += 1 c = c + data return i, c def add_all(data_size, path): count = 0 totalc = np.zeros((1, data_size*data_size*3), dtype=np.float32) for idx in range(1, 7): print 'reading batch'+str(idx) path += '/data_batch_' + str(idx) curcount, curc = read_batch(path, data_size) count += curcount totalc = totalc + curc return count, totalc def write_data(data_size, path): cout, total = add_all(data_size) a = [] for i in range(0, len(total[0])): c = total[0][i] / cout a.append( [c]) a_array = array(a, dtype = np.float32) return a_array def main(data_size, path): data_mean = write_data(data_size, path) label_names = ['nopeople', 'exist_people'] num1 = 5000 num2 = data_size*data_size*3 dic = {'data_mean':data_mean, 'label_names':label_names, 'num_cases_per_batch':num1, 'num_vis':num2} out_file = open(path+'/batches.meta', 'w+') cPickle.dump(dic, out_file) out_file.close() data_size = 64 for i in range(1, 7): makeBatch('./train-origin-pics-part'+str(i), 'baidu_data_size_'+str(data_size)+'/data_batch_'+str(i), data_size) main(data_size, 'baidu_data_size_'+str(data_size))