数据集的准备
# coding:utf-8
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import cv2
import os
def to_categorical(y, num_classes=None):
"""Converts a class vector (integers) to binary class matrix.
E.g. for use with categorical_crossentropy.
# Arguments
y: class vector to be converted into a matrix
(integers from 0 to num_classes).
num_classes: total number of classes.
# Returns
A binary matrix representation of the input.
"""
y = np.array(y, dtype='int')
input_shape = y.shape
if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
input_shape = tuple(input_shape[:-1])
y = y.ravel()
if not num_classes:
num_classes = np.max(y) + 1
n = y.shape[0]
categorical = np.zeros((n, num_classes))
categorical[np.arange(n), y] = 1
output_shape = input_shape + (num_classes,)
categorical = np.reshape(categorical, output_shape)
return categorical
# load images
X=[] # add images into a list
Y=[] # add labels into a list
path="/home/tom/workspace/PycharmProjects/logic_regression/dataset"
labels=os.listdir(path) # labels are image folders name
for i, label in enumerate(labels):
datas_path = path + '/' + label
for data in os.listdir(datas_path):
img = cv2.imread(datas_path + '/' + data)
img=cv2.resize(img,(60,60))
img=img.reshape(10800,) # flaten the 3d array
X.append(img)
Y.append(i)
X=np.array(X).astype(np.float32)/255 # convert from list to array and normalization
Y=np.array(Y).astype((np.float32))
Y=to_categorical(Y,2)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42) # split dataset with train and test by sklearn package
# 创建label的方法
import numpy as np
a=np.zeros((3,)) # 有3个样本标签为0
b=np.ones((3,)) # 有3个样本标签为1
c=b*2 # 有3个样本标签为2
d=np.concatenate((a,b,c)) # 将所有样本(a,b,c)串联起来
d=np.array(d,dtype='int') # 将数据类型改为整型
x=np.zeros((len(d),3)) # 创建final标签的mask
x[np.arange(len(d)),d]=1 # 创建one shot final标签
x
Out[13]:
array([[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[0., 1., 0.],
[0., 1., 0.],
[0., 1., 0.],
[0., 0., 1.],
[0., 0., 1.],
[0., 0., 1.]])
csv数据文件的读取(使用pandas package)
import pandas as pd
import numpy as np
pd_data=pd.read_csv('/home/test/iris_training.csv')
array_data=np.array(pd_data)