《深度学习原理与TensorFlow实战》书中涉及到的代码主要来源于:
A:Tensorflow/TensorflowModel/TFLean的样例,
B:https://github.com/DeepVisionTeam/TensorFlowBook.git
https://github.com/DeepVisionTeam/TensorFlowBook.git的Titanic中提供了测试需要的数据。
#《深度学习原理与TensorFlow实战》03 Hello TensorFlow
# 书源码地址:https://github.com/DeepVisionTeam/TensorFlowBook.git
# 视频讲座地址:http://edu.csdn.net/course/detail/5222
# win10 Tensorflow-gpu1.2.0 python3.6.1
# CUDA v8.0 cudnn-8.0-windows10-x64-v5.1
# 本地代码位置:D:\git\DeepLearning\TensorFlowBook\Titanic
# 01_tensorflow_basic.py
# 02_tensorflow_advanced.py
# 03_skflow.py (运行有错误,未解决)
# 04_tflearn.py
# csv_to_tfrecords.py
# data_processing.py
# read_from_tfrecords.py
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/csv_to_tfrecords.py
#! -*- coding:utf-8 -*-
import pandas as pd
import tensorflow as tf
# convert train.csv to train.tfrecords
def transform_to_tfrecord():
data = pd.read_csv('data/train.csv')
tfrecord_file = 'train.tfrecords'
def int_feature(value):
return tf.train.Feature(
int64_list=tf.train.Int64List(value=[value]))
def float_feature(value):
return tf.train.Feature(
float_list=tf.train.FloatList(value=[value]))
writer = tf.python_io.TFRecordWriter(tfrecord_file)
for i in range(len(data)):
features = tf.train.Features(feature={
'Age': float_feature(data['Age'][i]),
'Survived': int_feature(data['Survived'][i]),
'Pclass': int_feature(data['Pclass'][i]),
'Parch': int_feature(data['Parch'][i]),
'SibSp': int_feature(data['SibSp'][i]),
'Sex': int_feature(1 if data['Sex'][i] == 'male' else 0),
'Fare': float_feature(data['Fare'][i])
})
example = tf.train.Example(features=features)
writer.write(example.SerializeToString())
writer.close()
if __name__ == '__main__':
transform_to_tfrecord()
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/data_processing.py
import os
import re
import pandas as pd
import tensorflow as tf
pjoin = os.path.join
DATA_DIR = pjoin(os.path.dirname(__file__), 'data')
train_data = pd.read_csv(pjoin(DATA_DIR, 'train.csv'))
test_data = pd.read_csv(pjoin(DATA_DIR, 'test.csv'))
# Translation:
# Don: an honorific title used in Spain, Portugal, Italy
# Dona: Feminine form for don
# Mme: Madame, Mrs
# Mlle: Mademoiselle, Miss
# Jonkheer (female equivalent: Jonkvrouw) is a Dutch honorific of nobility
HONORABLE_TITLES = ['sir', 'lady', 'don', 'dona', 'countess', 'jonkheer',
'major', 'col', 'dr', 'master', 'capt']
NORMAL_TITLES = ['mr', 'ms', 'mrs', 'miss', 'mme', 'mlle', 'rev']
TITLES = HONORABLE_TITLES + NORMAL_TITLES
def get_title(name):
title_search = re.search('([A-Za-z]+)\.', name)
return title_search.group(1).lower()
def get_family(row):
last_name = row['Name'].split(",")[0]
if last_name:
family_size = 1 + row['Parch'] + row['SibSp']
if family_size > 3:
return "{0}_{1}".format(last_name.lower(), family_size)
else:
return "nofamily"
else:
return "unknown"
def get_deck(cabin):
if pd.isnull(cabin):
return 'U'
return cabin[:1]
class TitanicDigest(object):
def __init__(self, dataset):
self.count_by_sex = dataset.groupby('Sex')['PassengerId'].count()
self.mean_age = dataset['Age'].mean()
self.mean_age_by_sex = dataset.groupby("Sex")["Age"].mean()
self.mean_fare_by_class = dataset.groupby("Pclass")["Fare"].mean()
self.titles = TITLES
self.families = dataset.apply(get_family, axis=1).unique().tolist()
self.decks = dataset["Cabin"].apply(get_deck).unique().tolist()
self.embarkments = dataset.Embarked.unique().tolist()
self.embark_mode = dataset.Embarked.dropna().mode().values
def preprocess(data, digest):
# convert ['male', 'female'] values of Sex to [1, 0]
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
# fill empty age field with mean age
data['Age'] = data['Age'].apply(
lambda age: digest.mean_age if pd.isnull(age) else age)
# is child flag
data['Child'] = data['Age'].apply(lambda age: 1 if age <= 15 else 0)
# fill fare with mean fare of the class
def get_fare_value(row):
if pd.isnull(row['Fare']):
return digest.mean_fare_by_class[row['Pclass']]
else:
return row['Fare']
data['Fare'] = data.apply(get_fare_value, axis=1)
# fill Embarked with mode
data['Embarked'] = data['Embarked'].apply(
lambda e: digest.embark_mode if pd.isnull(e) else e)
data["EmbarkedF"] = data["Embarked"].apply(digest.embarkments.index)
#
data['Cabin'] = data['Cabin'].apply(lambda c: 'U0' if pd.isnull(c) else c)
# Deck
data["Deck"] = data["Cabin"].apply(lambda cabin: cabin[0])
data["DeckF"] = data['Deck'].apply(digest.decks.index)
data['Title'] = data['Name'].apply(get_title)
data['TitleF'] = data['Title'].apply(digest.titles.index)
data['Honor'] = data['Title'].apply(
lambda title: int(title in HONORABLE_TITLES))
data['Family'] = data.apply(get_family, axis=1)
if 'Survived' in data.keys():
data['Deceased'] = data['Survived'].apply(lambda s: int(not s))
return data
digest = TitanicDigest(train_data)
def get_train_data():
return preprocess(train_data, digest)
def get_test_data():
return preprocess(test_data, digest)
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def transform_to_tfrecord():
data = pd.read_csv(pjoin(DATA_DIR, 'train.csv'))
filepath = pjoin(DATA_DIR, 'data.tfrecords')
writer = tf.python_io.TFRecordWriter(filepath)
for i in range(len(data)):
feature = {}
for key in data.keys():
value = data[key][i]
if isinstance(value, int):
value = tf.train.Feature(
int64_list=tf.train.Int64List(value=[value]))
elif isinstance(value, float):
value = tf.train.Feature(
float_list=tf.train.FloatList(value=[value])
)
elif isinstance(value, str):
value = tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[value.encode(encoding="utf-8")])
)
feature[key] = value
example = tf.train.Example(
features=tf.train.Features(feature=feature))
writer.write(example.SerializeToString())
writer.close()
if __name__ == '__main__':
transform_to_tfrecord()
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/read_from_tfrecords.py
#!/usr/bin/env python
# coding=utf-8
import tensorflow as tf
def read_and_decode(train_files, num_threads=2, num_epochs=100,
batch_size=10, min_after_dequeue=10):
# read data from trainFile with TFRecord format
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(
train_files,
num_epochs=num_epochs)
_, serialized_example = reader.read(filename_queue)
featuresdict = tf.parse_single_example(
serialized_example,
features={
'Survived': tf.FixedLenFeature([], tf.int64),
'Pclass': tf.FixedLenFeature([], tf.int64),
'Parch': tf.FixedLenFeature([], tf.int64),
'SibSp': tf.FixedLenFeature([], tf.int64),
'Sex': tf.FixedLenFeature([], tf.int64),
'Age': tf.FixedLenFeature([], tf.float32),
'Fare': tf.FixedLenFeature([], tf.float32)})
# decode features to same format of float32
labels = featuresdict.pop('Survived')
features = [tf.cast(value, tf.float32)
for value in featuresdict.values()]
# get data with shuffle batch and return
features, labels = tf.train.shuffle_batch(
[features, labels],
batch_size=batch_size,
num_threads=num_threads,
capacity=min_after_dequeue + 3 * batch_size,
min_after_dequeue=min_after_dequeue)
return features, labels
def train_with_queuerunner():
x, y = read_and_decode(['train.tfrecords'])
with tf.Session() as sess:
tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer()).run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
step = 0
while not coord.should_stop():
# Run training steps or whatever
features, lables = sess.run([x, y])
if step % 100 == 0:
print('step %d:' % step, lables)
step += 1
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
if __name__ == '__main__':
train_with_queuerunner()
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/01_tensorflow_basic.py
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
################################
# Preparing Data
################################
# read data from file
data = pd.read_csv('data/train.csv')
# fill nan values with 0
data = data.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
# 'Survived' is the label of one class,
# add 'Deceased' as the other class
data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)
# select features and labels for training
dataset_X = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']].as_matrix()
dataset_Y = data[['Deceased', 'Survived']].as_matrix()
# split training data and validation set data
X_train, X_val, y_train, y_val = train_test_split(dataset_X, dataset_Y,
test_size=0.2,
random_state=42)
################################
# Constructing Dataflow Graph
################################
# create symbolic variables
X = tf.placeholder(tf.float32, shape=[None, 6])
y = tf.placeholder(tf.float32, shape=[None, 2])
# weights and bias are the variables to be trained
weights = tf.Variable(tf.random_normal([6, 2]), name='weights')
bias = tf.Variable(tf.zeros([2]), name='bias')
y_pred = tf.nn.softmax(tf.matmul(X, weights) + bias)
# Minimise cost using cross entropy
# NOTE: add a epsilon(1e-10) when calculate log(y_pred),
# otherwise the result will be -inf
cross_entropy = - tf.reduce_sum(y * tf.log(y_pred + 1e-10),
reduction_indices=1)
cost = tf.reduce_mean(cross_entropy)
# use gradient descent optimizer to minimize cost
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
# calculate accuracy
correct_pred = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
################################
# Training and Evaluating the model
################################
# use session to run the calculation
with tf.Session() as sess:
# variables have to be initialized at the first place
tf.global_variables_initializer().run()
# training loop
for epoch in range(10):
total_loss = 0.
for i in range(len(X_train)):
# prepare feed data and run
feed_dict = {X: [X_train[i]], y: [y_train[i]]}
_, loss = sess.run([train_op, cost], feed_dict=feed_dict)
total_loss += loss
# display loss per epoch
print('Epoch: %04d, total loss=%.9f' % (epoch + 1, total_loss))
# Accuracy calculated by TensorFlow
accuracy = sess.run(acc_op, feed_dict={X: X_val, y: y_val})
print("Accuracy on validation set: %.9f" % accuracy)
# Accuracy calculated by NumPy
pred = sess.run(y_pred, feed_dict={X: X_val})
correct = np.equal(np.argmax(pred, 1), np.argmax(y_val, 1))
numpy_accuracy = np.mean(correct.astype(np.float32))
print("Accuracy on validation set (numpy): %.9f" % numpy_accuracy)
# predict on test data
testdata = pd.read_csv('data/test.csv')
testdata = testdata.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
testdata['Sex'] = testdata['Sex'].apply(lambda s: 1 if s == 'male' else 0)
X_test = testdata[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]
predictions = np.argmax(sess.run(y_pred, feed_dict={X: X_test}), 1)
submission = pd.DataFrame({
"PassengerId": testdata["PassengerId"],
"Survived": predictions
})
submission.to_csv("titanic-submission.csv", index=False)
'''
Epoch: 0001, total loss=1332.714233560
Epoch: 0002, total loss=1080.481877883
Epoch: 0003, total loss=1146.676006738
...
Epoch: 0009, total loss=1100.583975340
Epoch: 0010, total loss=1091.225955000
Accuracy on validation set: 0.586592197
Accuracy on validation set (numpy): 0.586592197
'''
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/02_tensorflow_advanced.py
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
################################
# Preparing Data
################################
# read data from file
data = pd.read_csv('data/train.csv')
# fill nan values with 0
data = data.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)
# 'Survived' is the label of one class,
# add 'Deceased' as the other class
data['Deceased'] = data['Survived'].apply(lambda s: 1 - s)
# select features and labels for training
dataset_X = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']].as_matrix()
dataset_Y = data[['Deceased', 'Survived']].as_matrix()
# split training data and validation set data
X_train, X_val, y_train, y_val = train_test_split(dataset_X, dataset_Y,
test_size=0.2,
random_state=42)
################################
# Constructing Dataflow Graph
################################
# arguments that can be set in command line
tf.app.flags.DEFINE_integer('epochs', 10, 'Training epochs')
tf.app.flags.DEFINE_integer('batch_size', 10, 'size of mini-batch')
FLAGS = tf.app.flags.FLAGS
with tf.name_scope('input'):
# create symbolic variables
X = tf.placeholder(tf.float32, shape=[None, 6])
y_true = tf.placeholder(tf.float32, shape=[None, 2])
with tf.name_scope('classifier'):
# weights and bias are the variables to be trained
weights = tf.Variable(tf.random_normal([6, 2]))
bias = tf.Variable(tf.zeros([2]))
y_pred = tf.nn.softmax(tf.matmul(X, weights) + bias)
# add histogram summaries for weights, view on tensorboard
tf.summary.histogram('weights', weights)
tf.summary.histogram('bias', bias)
# Minimise cost using cross entropy
# NOTE: add a epsilon(1e-10) when calculate log(y_pred),
# otherwise the result will be -inf
with tf.name_scope('cost'):
cross_entropy = - tf.reduce_sum(y_true * tf.log(y_pred + 1e-10),
reduction_indices=1)
cost = tf.reduce_mean(cross_entropy)
tf.summary.scalar('loss', cost)
# use gradient descent optimizer to minimize cost
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
with tf.name_scope('accuracy'):
correct_pred = tf.equal(tf.argmax(y_true, 1), tf.argmax(y_pred, 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Add scalar summary for accuracy
tf.summary.scalar('accuracy', acc_op)
global_step = tf.Variable(0, name='global_step', trainable=False)
# use saver to save and restore model
saver = tf.train.Saver()
# this variable won't be stored, since it is declared after tf.train.Saver()
non_storable_variable = tf.Variable(777)
ckpt_dir = './ckpt_dir'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
################################
# Training the model
################################
# use session to run the calculation
with tf.Session() as sess:
# create a log writer. run 'tensorboard --logdir=./logs'
writer = tf.summary.FileWriter('./logs', sess.graph)
merged = tf.summary.merge_all()
# variables have to be initialized at the first place
tf.global_variables_initializer().run()
# restore variables from checkpoint if exists
ckpt = tf.train.get_checkpoint_state(ckpt_dir)
if ckpt and ckpt.model_checkpoint_path:
print('Restoring from checkpoint: %s' % ckpt.model_checkpoint_path)
saver.restore(sess, ckpt.model_checkpoint_path)
start = global_step.eval()
# training loop
for epoch in range(start, start + FLAGS.epochs):
total_loss = 0.
for i in range(0, len(X_train), FLAGS.batch_size):
# train with mini-batch
feed_dict = {
X: X_train[i: i + FLAGS.batch_size],
y_true: y_train[i: i + FLAGS.batch_size]
}
_, loss = sess.run([train_op, cost], feed_dict=feed_dict)
total_loss += loss
# display loss per epoch
print('Epoch: %04d, loss=%.9f' % (epoch + 1, total_loss))
summary, accuracy = sess.run([merged, acc_op],
feed_dict={X: X_val, y_true: y_val})
writer.add_summary(summary, epoch) # Write summary
print('Accuracy on validation set: %.9f' % accuracy)
# set and update(eval) global_step with epoch
global_step.assign(epoch).eval()
saver.save(sess, ckpt_dir + '/logistic.ckpt',
global_step=global_step)
print('Training complete!')
################################
# Evaluating on the test set
################################
# restore variables and run prediction in another session
with tf.Session() as sess:
# restore variables from checkpoint if exists
ckpt = tf.train.get_checkpoint_state(ckpt_dir)
if ckpt and ckpt.model_checkpoint_path:
print('Restoring from checkpoint: %s' % ckpt.model_checkpoint_path)
saver.restore(sess, ckpt.model_checkpoint_path)
# predict on test data
testdata = pd.read_csv('data/test.csv')
testdata = testdata.fillna(0)
# convert ['male', 'female'] values of Sex to [1, 0]
testdata['Sex'] = testdata['Sex'].apply(lambda s: 1 if s == 'male' else 0)
X_test = testdata[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]
# predict on test set
predictions = np.argmax(sess.run(y_pred, feed_dict={X: X_test}), 1)
submission = pd.DataFrame({
"PassengerId": testdata["PassengerId"],
"Survived": predictions
})
submission.to_csv("titanic-submission.csv", index=False)
'''
Epoch: 0001, loss=615.245625108
Accuracy on validation set: 0.586592138
...
Epoch: 0009, loss=610.188485358
Accuracy on validation set: 0.586592138
Epoch: 0010, loss=609.152617797
Accuracy on validation set: 0.586592197
Training complete!
Restoring from checkpoint: ./ckpt_dir\logistic.ckpt-9
# 生成一下文件
# .\titanic-submission.csv
# .\logs\events.out.tfevents.1498818459.LFY_P50
# .\ckpt_dir\logistic.ckpt*
# .\ckpt_dir\checkpoint
'''
这个没运行成功,有警告和错误
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/03_skflow.py
import pandas as pd
import tensorflow.contrib.learn as skflow
from sklearn import metrics
from sklearn.model_selection import train_test_split
from data_processing import get_test_data, get_train_data
train_data = get_train_data()
X = train_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
Y = train_data['Survived']
# split training data and validation set data
X_train, X_val, Y_train, Y_val = (
train_test_split(X, Y, test_size=0.1, random_state=42))
# skflow classifier
feature_cols = skflow.infer_real_valued_columns_from_input(X_train)
classifier = skflow.LinearClassifier(feature_columns=feature_cols, n_classes=2)
classifier.fit(X_train, Y_train, steps=200)
score = metrics.accuracy_score(Y_val, classifier.predict(X_val))
print("Accuracy: %f" % score)
# predict on test dataset
test_data = get_test_data()
X = test_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
predictions = classifier.predict(X)
submission = pd.DataFrame({
"PassengerId": test_data["PassengerId"],
"Survived": predictions
})
submission.to_csv("titanic-submission.csv", index=False)
'''
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\soft\AppData\Local\Temp\tmpc5meq6vi
WARNING:tensorflow:From D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py:20: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:From D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py:20: calling BaseEstimator.fit (from tensorflow.contrib.learn.python.learn.estimators.estimator) with y is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\head.py:625: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
WARNING:tensorflow:Casting labels to bool.
WARNING:tensorflow:Casting labels to bool.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:347: calling LinearClassifier.predict (from tensorflow.contrib.learn.python.learn.estimators.linear) with outputs=None is deprecated and will be removed after 2017-03-01.
Instructions for updating:
Please switch to predict_classes, or set `outputs` argument.
WARNING:tensorflow:From C:\Python36\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\linear.py:565: calling BaseEstimator.predict (from tensorflow.contrib.learn.python.learn.estimators.estimator) with x is deprecated and will be removed after 2016-12-01.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
est = Estimator(...) -> est = SKCompat(Estimator(...))
WARNING:tensorflow:float64 is not supported by many models, consider casting to float32.
2017-06-30 17:57:09.918373: I d:\git\deeplearning\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: Quadro M2000M, pci bus id: 0000:01:00.0)
Traceback (most recent call last):
File "D:/git/DeepLearning/TensorFlowBook/Titanic/03_skflow.py", line 21, in
score = metrics.accuracy_score(Y_val, classifier.predict(X_val))
File "C:\Python36\lib\site-packages\sklearn\metrics\classification.py", line 172, in accuracy_score
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "C:\Python36\lib\site-packages\sklearn\metrics\classification.py", line 72, in _check_targets
check_consistent_length(y_true, y_pred)
File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 177, in check_consistent_length
lengths = [_num_samples(X) for X in arrays if X is not None]
File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 177, in
lengths = [_num_samples(X) for X in arrays if X is not None]
File "C:\Python36\lib\site-packages\sklearn\utils\validation.py", line 122, in _num_samples
type(x))
TypeError: Expected sequence or array-like, got <class 'generator'>
'''
# https://github.com/DeepVisionTeam/TensorFlowBook/blob/master/Titanic/04_tflearn.py
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn
from data_processing import get_test_data
from data_processing import get_train_data
train_data = get_train_data()
X = train_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
Y = train_data[['Deceased', 'Survived']].as_matrix()
# arguments that can be set in command line
tf.app.flags.DEFINE_integer('epochs', 10, 'Training epochs')
FLAGS = tf.app.flags.FLAGS
ckpt_dir = './ckpt_dir'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
# defind model
n_features = X.shape[1]
input = tflearn.input_data([None, n_features])
network = tflearn.layers.fully_connected(input, 100, activation='relu')
network = tflearn.layers.fully_connected(network, 100, activation='relu')
y_pred = tflearn.layers.fully_connected(network, 2, activation='softmax')
net = tflearn.regression(y_pred)
model = tflearn.DNN(net)
# restore model if there is a checkpoint
if os.path.isfile(os.path.join(ckpt_dir, 'model.ckpt')):
model.load(os.path.join(ckpt_dir, 'model.ckpt'))
# train model
model.fit(X, Y, validation_set=0.1, n_epoch=FLAGS.epochs)
# save the trained model
model.save(os.path.join(ckpt_dir, 'model.ckpt'))
metric = model.evaluate(X, Y)
print('Accuracy on train set: %.9f' % metric[0])
# predict on test dataset
test_data = get_test_data()
X = test_data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Child',
'EmbarkedF', 'DeckF', 'TitleF', 'Honor']].as_matrix()
predictions = np.argmax(model.predict(X), 1)
submission = pd.DataFrame({
"PassengerId": test_data["PassengerId"],
"Survived": predictions
})
submission.to_csv("titanic-submission.csv", index=False)
'''
---------------------------------
Run id: 0P7SI9
Log directory: /tmp/tflearn_logs/
---------------------------------
Training samples: 801
Validation samples: 90
--
Training Step: 1 | time: 0.295s
| Adam | epoch: 001 | loss: 0.00000 -- iter: 064/801
Training Step: 2 | total loss: 0.62647 | time: 0.299s
| Adam | epoch: 001 | loss: 0.62647 -- iter: 128/801
...
Training Step: 129 | total loss: 0.53789 | time: 0.052s
| Adam | epoch: 010 | loss: 0.53789 -- iter: 768/801
Training Step: 130 | total loss: 0.53268 | time: 1.059s
| Adam | epoch: 010 | loss: 0.53268 | val_loss: 0.49138 -- iter: 801/801
--
Accuracy on train set: 0.790123456
'''