数据集的准备

数据集的准备


# coding:utf-8
# -*- coding: utf-8 -*-

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import cv2
import os


def to_categorical(y, num_classes=None):
    """Converts a class vector (integers) to binary class matrix.
    E.g. for use with categorical_crossentropy.
    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.
    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes))
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

# load images
X=[]       # add images into a list 
Y=[]       # add labels into a list    
path="/home/tom/workspace/PycharmProjects/logic_regression/dataset"
labels=os.listdir(path)     # labels are image folders name 
for i, label in enumerate(labels):
    datas_path = path + '/' + label
    for data in os.listdir(datas_path):
        img = cv2.imread(datas_path + '/' + data)
        img=cv2.resize(img,(60,60))
        img=img.reshape(10800,)          # flaten the 3d array
        X.append(img)
        Y.append(i)

X=np.array(X).astype(np.float32)/255      # convert from list to array and normalization
Y=np.array(Y).astype((np.float32))
Y=to_categorical(Y,2)

X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)  # split dataset with train and test by sklearn package


# 创建label的方法

import numpy as np

a=np.zeros((3,))    # 有3个样本标签为0

b=np.ones((3,))     # 有3个样本标签为1  

c=b*2               # 有3个样本标签为2

d=np.concatenate((a,b,c))  # 将所有样本(a,b,c)串联起来

d=np.array(d,dtype='int')  # 将数据类型改为整型

x=np.zeros((len(d),3))     # 创建final标签的mask

x[np.arange(len(d)),d]=1   # 创建one shot final标签

x
Out[13]: 
array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

csv数据文件的读取(使用pandas package)

import pandas as pd
import numpy as np

pd_data=pd.read_csv('/home/test/iris_training.csv')
array_data=np.array(pd_data)


你可能感兴趣的:(Python)