keras入门-简单数据集的运用

Keras:

Keras是一个高层神经网络API,Keras由纯Python编写而成并基Tensorflow、Theano以及CNTK后端。Keras为支持快速实验而生.

Keras的核心数据结构是“模型”,模型是一种组织网络层的方式。Keras中主要的模型是Sequential模型,Sequential是一系列网络层按顺序构成的栈。

下面我就用两个经典数据集iris 和 mnist 来解释Keras是如何完成逻辑分类的


iris

# coding=utf-8

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegressionCV

from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils

这里用到sklearn的两种切分数据集的方法:

from sklearn.model_selection import ShuffleSplit
rs = ShuffleSplit(n_splits=1, train_size=0.6, test_size=0.4, random_state=1)
#n_splits表示只需要一个样本结果,现在训练-验证集比例为: 6:4, random_state为洗牌次数

rs.get_n_splits(X)
X_trainset = None
X_testset = None
y_trainset = None
y_testset = None

for train_index, test_index in rs.split(X, y):
X_trainset, X_testset = X[train_index], X[test_index]
y_trainset, y_testset = y[train_index], y[test_index]

2
from sklearn.cross_validation import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=0.1, random_state=0)
#随机切分


所有的Y都转化为one-hot函数:

def one_hot_encode_object_array(arr):
    uniques, ids = np.unique(arr, return_inverse=True)
    return np_utils.to_categorical(ids, len(uniques))

iris = sns.load_dataset("iris")
# 导入数据集iris
X = iris.values[:, :4]
y = iris.values[:, 4]

#划分数据集
train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=0.6, random_state=0)
print train_X
train_y_ohe = one_hot_encode_object_array(train_y)
test_y_ohe = one_hot_encode_object_array(test_y)


模型:

#定义模型
model = Sequential()
# 输入层大小,隐含层大小,激活函数
model.add(Dense(200,input_shape=(4,)))
model.add(Activation('relu'))
#输出层大小,激活函数,两层网络
model.add(Dense(3))
model.add(Activation('softmax'))
#编译模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
#训练模型
model.fit(train_X, train_y_ohe, nb_epoch=100, batch_size=1, verbose=1)

#评估模型成绩
# predict_y = model.predict_proba(test_X)
# print predict_y
loss, accuracy = model.evaluate(test_X,test_y_ohe,verbose=0)
print ("Accuracy = {:.2f}".format(accuracy))

mnist:

import numpy as np
import keras
np.random.seed(1337)
#随机种子
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils

batch_size = 128
nb_classes = 10
#类别,数字0-9
np_epoch = 10
#梯度下降循环训练次数
img_size = 28*28
#输入图片大小

(X_train,y_train),(X_test,y_test) = mnist.load_data()
#加载数据集,已经划分好,shuffle-split
X_train = X_train.reshape(y_train.shape[0],img_size).astype("float32")/255
X_test =  X_test.reshape(y_test.shape[0],img_size).astype("float32")/255
#生成图片向量,颜色1-255
print X_train.shape
print X_test.shape

#老规矩,把Y变为one-hot编码
Y_train = np_utils.to_categorical(y_train,nb_classes)
Y_test = np_utils.to_categorical(y_test,nb_classes)

model = Sequential([Dense(10,input_shape=(img_size,),activation="softmax"),])
#优化模型,随机梯度下降(rmsprop),loss_funtion(交叉熵)
model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(X_train,Y_train,batch_size=batch_size,nb_epoch=1,verbose=1,validation_data=(X_test,Y_test))

评估模型成绩:

score = model.evaluate(X_test,Y_test,verbose=0)
print ('accuracy:{}'.format(score[1]))


my github:

https://github.com/TFknight/PythonStudy/tree/master/keras-learn

你可能感兴趣的:(sklearn,深度学习)