TensorFlow实现预测Titanic

import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np

data = pd.read_csv('F:\\Kaggle\\train.csv')#读取数据

print(data.info())  # 查看数据概况

# 取部分特征字段用于分类,并将所有缺失的字段填充为0
data['Sex'] = data['Sex'].apply(lambda s: 1 if s == 'male' else 0)#这个是把字符串转换成0 和 1 ,
data = data.fillna(0)#这个是缺省值填0
dataset_X = data[['Sex', 'Age', 'Pclass', 'SibSp', 'Parch', 'Fare']]
dataset_X = dataset_X.as_matrix()

# 两种分类分别为幸存和死亡,‘Survived’字段是其中一种分类的标签
# 新增‘Deceased’表示第二种分类的标签,取值为‘Survived’字段取非
data['Deceased'] = data['Survived'].apply(lambda s: int(not s))
dataset_Y = data[['Deceased', 'Survived']]
dataset_Y = dataset_Y.as_matrix()

X_train, X_test, y_train, y_test = train_test_split(dataset_X, dataset_Y, \
                                                    test_size=0.2, random_state=42)
# 构建计算图
# 声明输入数据占位符
# shape参数的第一个元素为None,表示可以同时放入任意条记录
X = tf.placeholder(tf.float32, shape=[None, 6])
y = tf.placeholder(tf.float32, shape=[None, 2])
# 声明变量
W = tf.Variable(tf.random_normal([6, 2]), name='weights')
b = tf.Variable(tf.zeros([2]), name='bias')


# 逻辑回归的公式
y_pred = tf.nn.softmax(tf.add(tf.matmul(X, W), b))
# 声明代价函数:使用交叉熵作为代价函数
cross_entroy = -tf.reduce_sum(y * tf.log(y_pred + 1e-10))
cost = tf.reduce_mean(cross_entroy)

# 加入优化算法:其中0.001是learning rate
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)


# 构建训练迭代过程
with tf.Session() as sess:
    # 初始化所有变量,必须最先执行
    #    sess.run(tf.global_variables_initializer())
    tf.global_variables_initializer().run()
    # 以下为训练迭代,迭代10轮
    for epoch in range(10):
        total_loss = 0
        for i in range(len(X_train)):
            feed = {X: [X_train[i]], y: [y_train[i]]}
            # 通过session.run接口触发执行
            _, loss = sess.run([train_op, cost], feed_dict=feed)
            total_loss += loss
        print('Epoch: %04d, total loss=%.9f' % (epoch + 1, total_loss))
    print('Training complete!')

    pred = sess.run(y_pred, feed_dict={X: X_train})
    correct = np.equal(np.argmax(pred, 1), np.argmax(y_train, 1))
    accuracy = np.mean(correct.astype(np.float32))
    print('Accuracy on validation set: %.9f' % accuracy)

你可能感兴趣的:(TensorFlow)