Convnet生成batches.meta等数据文件

最简单的方法是找个能调试的python IDE,读取各个文件之后看里面的内容,这里有个我写的方法,仔细阅读之后就可以使用了。事先安装PIL包。有错写评论即可。

第93,94行是建立各个data_batch_1等等

最后一行建立batches.meta文件

这个code生成7个data_batch文件,分为两类,一类为nopeople,一类为exist_people。需要修改网络配置文件[fc10]层outputs个数为2。

14-1-9补充:这个程序最好在linux下使用,windows下生成的文件貌似在使用时会报错。


import os
import cPickle
import pickle
import numpy as np
from numpy import array, append
from PIL import Image
import Image

def makeBatch (load_path, save_path, data_size):
    data = []
    filenames = []
    class_list = []
    class_file = file('train-origin-pics-labels.txt', 'r+').readlines()
    file_list = os.listdir(load_path)
    num_sq = save_path[len(save_path)-1]
    for item in  file_list:
        if item.endswith(".jpg"):
            picture_number = item[0:len(item)-4]
            picture_num = int(picture_number)
            class_picture = class_file[picture_num-1][10:11]
            if int(picture_num)%100 == 0:
                print picture_number
            n = os.path.join(load_path, item)
            inputImage = Image.open(n)
            (width,height) = inputImage.size
            #if  width > height:
            #    newwidth = width/height*128
            #    small_image = inputImage.resize((newwidth, 128),Image.ANTIALIAS)
            #else:
            #    newheight = height/width*128
            #    small_image = inputImage.resize((128, newheight),Image.ANTIALIAS)
            small_image = inputImage.resize((data_size, data_size),Image.ANTIALIAS)
            try:
                r, g, b = small_image.split()
                reseqImage = list(r.getdata()) + list(g.getdata()) + list(b.getdata())
                data.append(reseqImage)
                filenames.append(item)
                class_list.append(class_picture)
            except:
                print 'error' + picture_number
    data_array = np.array(data, dtype = np.uint8)
    T_data = data_array.T
    out_file = file(save_path, 'w')
    dic = {'batch_label':'batch ' + num_sq + ' of 6', 'data':T_data, 'labels':class_list, 'filenames':filenames}
    pickle.dump(dic, out_file)
    out_file.close()

def read_batch(batch_path, data_size):
    in_file = open(batch_path, 'r+')
    xx = cPickle.load(in_file)
    in_file.close()
    T_datas = xx['data']
    datas = T_datas.T
    c = np.zeros((1, data_size*data_size*3), dtype=np.float32)
    i  = 0
    for data in datas:
        i += 1
        c = c + data
    return i, c

def add_all(data_size, path):
    count = 0
    totalc = np.zeros((1, data_size*data_size*3), dtype=np.float32)
    for idx in range(1, 7):
        print 'reading batch'+str(idx)
        path += '/data_batch_' + str(idx)
        curcount, curc = read_batch(path, data_size)
        count += curcount
        totalc = totalc + curc

    return count, totalc

def write_data(data_size, path):
    cout, total = add_all(data_size)
    a  = []
    for i in range(0, len(total[0])):
        c = total[0][i] / cout
        a.append( [c])
    a_array = array(a, dtype = np.float32)
    return a_array

def main(data_size, path):
    data_mean = write_data(data_size, path)
    label_names = ['nopeople', 'exist_people']
    num1 = 5000
    num2 = data_size*data_size*3
    dic = {'data_mean':data_mean, 'label_names':label_names, 'num_cases_per_batch':num1, 'num_vis':num2}
    out_file = open(path+'/batches.meta', 'w+')
    cPickle.dump(dic, out_file)
    out_file.close()

data_size = 64
for i in range(1, 7):
    makeBatch('./train-origin-pics-part'+str(i), 'baidu_data_size_'+str(data_size)+'/data_batch_'+str(i), data_size)
main(data_size, 'baidu_data_size_'+str(data_size))




你可能感兴趣的:(Convnet生成batches.meta等数据文件)