TensorFlow2.0 Keras泰坦尼克数据集预测

import urllib.request
import os

url = 'http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls'
filepath = './data/titanic3.xls'
if not os.path.isfile(filepath):
    result = urllib.request.urlretrieve(url, filepath)
    print('downloaded:', result)
downloaded: ('./data/titanic3.xls', )
import numpy as np
import pandas as pd

all_df = pd.read_excel('./data/titanic3.xls')
cols = [
    'survived', 'name', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
    'embarked'
]
all_df = all_df[cols]
all_df[:2]
survived name pclass sex age sibsp parch fare embarked
0 1 Allen, Miss. Elisabeth Walton 1 female 29.0000 0 0 211.3375 S
1 1 Allison, Master. Hudson Trevor 1 male 0.9167 1 2 151.5500 S
df = all_df.drop(['name'], axis=1)
all_df.isnull().sum()
survived      0
name          0
pclass        0
sex           0
age         263
sibsp         0
parch         0
fare          1
embarked      2
dtype: int64
age_mean = df['age'].mean()
df['age'] = df['age'].fillna(age_mean)
fare_mean = df['fare'].mean()
df['fare'] = df['fare'].fillna(fare_mean)
df['sex'] = df['sex'].map({'female':0, 'male': 1}).astype(int)
x_Onehot_df = pd.get_dummies(data=df, columns=['embarked'])
x_Onehot_df[:2]
survived pclass sex age sibsp parch fare embarked_C embarked_Q embarked_S
0 1 1 0 29.0000 0 0 211.3375 0 0 1
1 1 1 1 0.9167 1 2 151.5500 0 0 1
ndarray = x_Onehot_df.values
ndarray.shape
(1309, 10)
ndarray[:2]
array([[  1.    ,   1.    ,   0.    ,  29.    ,   0.    ,   0.    ,
        211.3375,   0.    ,   0.    ,   1.    ],
       [  1.    ,   1.    ,   1.    ,   0.9167,   1.    ,   2.    ,
        151.55  ,   0.    ,   0.    ,   1.    ]])
Label = ndarray[:,0]
Features = ndarray[:, 1:]
Label[:2]
array([1., 1.])
Features[:2]
array([[  1.    ,   0.    ,  29.    ,   0.    ,   0.    , 211.3375,
          0.    ,   0.    ,   1.    ],
       [  1.    ,   1.    ,   0.9167,   1.    ,   2.    , 151.55  ,
          0.    ,   0.    ,   1.    ]])
from sklearn import preprocessing
minmax_Scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaledFeatures = minmax_Scale.fit_transform(Features)
scaledFeatures[:2]
array([[0.        , 0.        , 0.36116884, 0.        , 0.        ,
        0.41250333, 0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.00939458, 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ]])
msk = np.random.rand(len(all_df)) < 0.8
train_df = all_df[msk]
test_df = all_df[~msk]
print('total:', len(all_df), 'train:', len(train_df), 'test:', len(test_df))
total: 1309 train: 1071 test: 238
def PreprocessData(raw_df):
    df = raw_df.drop(['name'], axis=1)
    age_mean = df['age'].mean()
    df['age'] = df['age'].fillna(age_mean)
    fare_mean = df['fare'].mean()
    df['fare'] = df['fare'].fillna(age_mean)
    df['sex'] = df['sex'].map({'female': 0, 'male': 1}).astype(int)
    x_Onehot_df = pd.get_dummies(data=df, columns=['embarked'])
    
    ndarray = x_Onehot_df.values
    Features = ndarray[:, 1:]
    Label = ndarray[:, 0]
    
    minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
    scaledFeatures = minmax_scale.fit_transform(Features)
    
    return scaledFeatures, Label
train_Features, train_Label = PreprocessData(train_df)
test_Features, test_Label = PreprocessData(test_df)
train_Features[:2]
array([[0.        , 0.        , 0.0229641 , 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.37369494, 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ]])
test_Label[:2]
array([1., 1.])
# 建立模型
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
model = Sequential()
model.add(
    Dense(units=40,
          input_dim=9,
          kernel_initializer='uniform',
          activation='relu'))

model.add(Dense(units=30, kernel_initializer='uniform', activation='relu'))
model.add(Dense(units=1,kernel_initializer='uniform', activation='sigmoid'))
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
W0819 11:23:43.940761  6100 deprecation_wrapper.py:119] From E:\Anaconda3\envs\ml\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0819 11:23:43.970712  6100 deprecation_wrapper.py:119] From E:\Anaconda3\envs\ml\lib\site-packages\keras\backend\tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0819 11:23:43.976665  6100 deprecation.py:323] From E:\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\nn_impl.py:180: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
train_history = model.fit(x=train_Features,
                          y=train_Label,
                          validation_split=0.1,
                          batch_size=30,
                          epochs=30,
                          verbose=2)
Train on 963 samples, validate on 108 samples
Epoch 1/30
 - 0s - loss: 0.6645 - acc: 0.6023 - val_loss: 0.5840 - val_acc: 0.7685
Epoch 2/30
 - 0s - loss: 0.6062 - acc: 0.6594 - val_loss: 0.4936 - val_acc: 0.7870
Epoch 3/30
 - 0s - loss: 0.5513 - acc: 0.7487 - val_loss: 0.4564 - val_acc: 0.7870
Epoch 4/30
 - 0s - loss: 0.5151 - acc: 0.7747 - val_loss: 0.4487 - val_acc: 0.8056
Epoch 5/30
 - 0s - loss: 0.4968 - acc: 0.7757 - val_loss: 0.4538 - val_acc: 0.8056
Epoch 6/30
 - 0s - loss: 0.4882 - acc: 0.7736 - val_loss: 0.4354 - val_acc: 0.8056
Epoch 7/30
 - 0s - loss: 0.4839 - acc: 0.7695 - val_loss: 0.4277 - val_acc: 0.8148
Epoch 8/30
 - 0s - loss: 0.4818 - acc: 0.7788 - val_loss: 0.4254 - val_acc: 0.8148
Epoch 9/30
 - 0s - loss: 0.4796 - acc: 0.7840 - val_loss: 0.4231 - val_acc: 0.8333
Epoch 10/30
 - 0s - loss: 0.4766 - acc: 0.7819 - val_loss: 0.4247 - val_acc: 0.8148
Epoch 11/30
 - 0s - loss: 0.4733 - acc: 0.7830 - val_loss: 0.4240 - val_acc: 0.8148
Epoch 12/30
 - 0s - loss: 0.4714 - acc: 0.7840 - val_loss: 0.4174 - val_acc: 0.8333
Epoch 13/30
 - 0s - loss: 0.4684 - acc: 0.7871 - val_loss: 0.4181 - val_acc: 0.8426
Epoch 14/30
 - 0s - loss: 0.4666 - acc: 0.7871 - val_loss: 0.4169 - val_acc: 0.8426
Epoch 15/30
 - 0s - loss: 0.4643 - acc: 0.7892 - val_loss: 0.4151 - val_acc: 0.8519
Epoch 16/30
 - 0s - loss: 0.4632 - acc: 0.7892 - val_loss: 0.4134 - val_acc: 0.8426
Epoch 17/30
 - 0s - loss: 0.4618 - acc: 0.7902 - val_loss: 0.4133 - val_acc: 0.8426
Epoch 18/30
 - 0s - loss: 0.4618 - acc: 0.7913 - val_loss: 0.4145 - val_acc: 0.8056
Epoch 19/30
 - 0s - loss: 0.4606 - acc: 0.7944 - val_loss: 0.4160 - val_acc: 0.8426
Epoch 20/30
 - 0s - loss: 0.4606 - acc: 0.7934 - val_loss: 0.4155 - val_acc: 0.8148
Epoch 21/30
 - 0s - loss: 0.4588 - acc: 0.7944 - val_loss: 0.4124 - val_acc: 0.8426
Epoch 22/30
 - 0s - loss: 0.4568 - acc: 0.7954 - val_loss: 0.4136 - val_acc: 0.8426
Epoch 23/30
 - 0s - loss: 0.4571 - acc: 0.7985 - val_loss: 0.4152 - val_acc: 0.8333
Epoch 24/30
 - 0s - loss: 0.4585 - acc: 0.7923 - val_loss: 0.4190 - val_acc: 0.8056
Epoch 25/30
 - 0s - loss: 0.4577 - acc: 0.7923 - val_loss: 0.4162 - val_acc: 0.8426
Epoch 26/30
 - 0s - loss: 0.4610 - acc: 0.7882 - val_loss: 0.4192 - val_acc: 0.8426
Epoch 27/30
 - 0s - loss: 0.4553 - acc: 0.8006 - val_loss: 0.4156 - val_acc: 0.8333
Epoch 28/30
 - 0s - loss: 0.4580 - acc: 0.7902 - val_loss: 0.4186 - val_acc: 0.7963
Epoch 29/30
 - 0s - loss: 0.4590 - acc: 0.7975 - val_loss: 0.4145 - val_acc: 0.8426
Epoch 30/30
 - 0s - loss: 0.4550 - acc: 0.7934 - val_loss: 0.4165 - val_acc: 0.8241
scores = model.evaluate(x=test_Features, y= test_Label)
238/238 [==============================] - 0s 21us/step
scores[1]
0.8025210089042407
Jack = pd.Series([0, 'Jack', 3, 'male', 23, 1, 0, 5.000, 'S'])
Rose = pd.Series([1, 'Rose', 1, 'female', 20, 1, 0, 100.000, 'S'])
JR_df = pd.DataFrame([list(Jack), list(Rose)],
                     columns=[
                         'survived', 'name', 'pclass', 'sex', 'age', 'sibsp',
                         'parch', 'fare', 'embarked'
                     ])
all_df = pd.concat([all_df, JR_df])
all_df[~2:]
survived name pclass sex age sibsp parch fare embarked
1308 0 Zimmerman, Mr. Leo 3 male 29.0 0 0 7.875 S
0 0 Jack 3 male 23.0 1 0 5.000 S
1 1 Rose 1 female 20.0 1 0 100.000 S
all_Features, Label = PreprocessData(all_df)

all_probability = model.predict(all_Features)
all_probability[:10]
array([[0.97387624],
       [0.36760893],
       [0.9653297 ],
       [0.29578814],
       [0.96136355],
       [0.26288155],
       [0.93404984],
       [0.27685004],
       [0.92254674],
       [0.30783302]], dtype=float32)
pd = all_df
pd.insert(len(all_df.columns),
         'probability', all_probability)
pd[~2:]
survived name pclass sex age sibsp parch fare embarked probability
1308 0 Zimmerman, Mr. Leo 3 male 29.0 0 0 7.875 S 0.132631
0 0 Jack 3 male 23.0 1 0 5.000 S 0.130663
1 1 Rose 1 female 20.0 1 0 100.000 S 0.963028
pd[(pd['survived'] == 0) ]
survived name pclass sex age sibsp parch fare embarked probability
2 0 Allison, Miss. Helen Loraine 1 female 2.0 1 2 151.5500 S 0.965330
3 0 Allison, Mr. Hudson Joshua Creighton 1 male 30.0 1 2 151.5500 S 0.295788
4 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) 1 female 25.0 1 2 151.5500 S 0.961364
7 0 Andrews, Mr. Thomas Jr 1 male 39.0 0 0 0.0000 S 0.276850
9 0 Artagaveytia, Mr. Ramon 1 male 71.0 0 0 49.5042 C 0.307833
10 0 Astor, Col. John Jacob 1 male 47.0 1 0 227.5250 C 0.382211
15 0 Baumann, Mr. John D 1 male NaN 0 0 25.9250 S 0.303370
16 0 Baxter, Mr. Quigg Edmond 1 male 24.0 0 1 247.5208 C 0.568902
19 0 Beattie, Mr. Thomson 1 male 36.0 0 0 75.2417 C 0.418435
25 0 Birnbaum, Mr. Jakob 1 male 25.0 0 0 26.0000 C 0.446399
30 0 Blackwell, Mr. Stephen Weart 1 male 45.0 0 0 35.5000 S 0.271255
34 0 Borebank, Mr. John James 1 male 42.0 0 0 26.5500 S 0.275931
38 0 Brady, Mr. John Bertram 1 male 41.0 0 0 30.5000 S 0.278998
39 0 Brandeis, Mr. Emil 1 male 48.0 0 0 50.4958 C 0.364540
40 0 Brewe, Dr. Arthur Jackson 1 male NaN 0 0 39.6000 C 0.429713
45 0 Butt, Major. Archibald Willingham 1 male 45.0 0 0 26.5500 S 0.269356
46 0 Cairns, Mr. Alexander 1 male NaN 0 0 31.0000 S 0.304525
51 0 Carlsson, Mr. Frans Olof 1 male 33.0 0 0 5.0000 S 0.291429
52 0 Carrau, Mr. Francisco M 1 male 28.0 0 0 47.1000 S 0.312617
53 0 Carrau, Mr. Jose Pedro 1 male 17.0 0 0 47.1000 S 0.339315
58 0 Case, Mr. Howard Brown 1 male 49.0 0 0 26.0000 S 0.260632
60 0 Cavendish, Mr. Tyrell William 1 male 36.0 1 0 78.8500 S 0.271148
62 0 Chaffee, Mr. Herbert Fuller 1 male 46.0 1 0 61.1750 S 0.246321
70 0 Chisholm, Mr. Roderick Robert Crispin 1 male NaN 0 0 0.0000 S 0.297509
71 0 Clark, Mr. Walter Miller 1 male 27.0 1 0 136.7792 C 0.426140
74 0 Clifford, Mr. George Quincy 1 male NaN 0 0 52.0000 S 0.309330
75 0 Colley, Mr. Edward Pomeroy 1 male 47.0 0 0 25.5875 S 0.264827
77 0 Compton, Mr. Alexander Taylor Jr 1 male 37.0 1 1 83.1583 C 0.365659
80 0 Crafton, Mr. John Bertram 1 male NaN 0 0 26.5500 S 0.303512
81 0 Crosby, Capt. Edward Gifford 1 male 70.0 1 1 71.0000 S 0.200277
... ... ... ... ... ... ... ... ... ... ...
1276 0 Vander Planke, Mrs. Julius (Emelia Maria Vande... 3 female 31.0 1 0 18.0000 S 0.390119
1278 0 Vendel, Mr. Olof Edvin 3 male 20.0 0 0 7.8542 S 0.144151
1279 0 Vestrom, Miss. Hulda Amanda Adolfina 3 female 14.0 0 0 7.8542 S 0.541694
1280 0 Vovk, Mr. Janko 3 male 22.0 0 0 7.8958 S 0.141521
1281 0 Waelens, Mr. Achille 3 male 22.0 0 0 9.0000 S 0.141519
1282 0 Ware, Mr. Frederick 3 male NaN 0 0 8.0500 S 0.131565
1283 0 Warren, Mr. Charles William 3 male NaN 0 0 7.5500 S 0.131566
1284 0 Webber, Mr. James 3 male NaN 0 0 8.0500 S 0.131565
1285 0 Wenzel, Mr. Linhart 3 male 32.5 0 0 9.5000 S 0.128364
1287 0 Widegren, Mr. Carl/Charles Peter 3 male 51.0 0 0 7.7500 S 0.107727
1288 0 Wiklund, Mr. Jakob Alfred 3 male 18.0 1 0 6.4958 S 0.136882
1289 0 Wiklund, Mr. Karl Johan 3 male 21.0 1 0 6.4958 S 0.133120
1291 0 Willer, Mr. Aaron ("Abi Weller") 3 male NaN 0 0 8.7125 S 0.131564
1292 0 Willey, Mr. Edward 3 male NaN 0 0 7.5500 S 0.131566
1293 0 Williams, Mr. Howard Hugh "Harry" 3 male NaN 0 0 8.0500 S 0.131565
1294 0 Williams, Mr. Leslie 3 male 28.5 0 0 16.1000 S 0.133237
1295 0 Windelov, Mr. Einar 3 male 21.0 0 0 7.2500 S 0.142832
1296 0 Wirz, Mr. Albert 3 male 27.0 0 0 8.6625 S 0.135120
1297 0 Wiseman, Mr. Phillippe 3 male NaN 0 0 7.2500 S 0.131567
1298 0 Wittevrongel, Mr. Camille 3 male 36.0 0 0 9.5000 S 0.124216
1299 0 Yasbeck, Mr. Antoni 3 male 27.0 1 0 14.4542 C 0.161984
1301 0 Youseff, Mr. Gerious 3 male 45.5 0 0 7.2250 C 0.147109
1302 0 Yousif, Mr. Wazli 3 male NaN 0 0 7.2250 C 0.169266
1303 0 Yousseff, Mr. Gerious 3 male NaN 0 0 14.4583 C 0.169295
1304 0 Zabour, Miss. Hileni 3 female 14.5 1 0 14.4542 C 0.674486
1305 0 Zabour, Miss. Thamine 3 female NaN 1 0 14.4542 C 0.603369
1306 0 Zakarian, Mr. Mapriededer 3 male 26.5 0 0 7.2250 C 0.174369
1307 0 Zakarian, Mr. Ortin 3 male 27.0 0 0 7.2250 C 0.173603
1308 0 Zimmerman, Mr. Leo 3 male 29.0 0 0 7.8750 S 0.132631
0 0 Jack 3 male 23.0 1 0 5.0000 S 0.130663

810 rows × 10 columns

pd[(pd['survived'] == 0) & (pd['probability'] > 0.9)]
survived name pclass sex age sibsp parch fare embarked probability
2 0 Allison, Miss. Helen Loraine 1 female 2.0 1 2 151.5500 S 0.965330
4 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) 1 female 25.0 1 2 151.5500 S 0.961364
105 0 Evans, Miss. Edith Corse 1 female 36.0 0 0 31.6792 C 0.973539
169 0 Isham, Miss. Ann Elizabeth 1 female 50.0 0 0 28.7125 C 0.971705
286 0 Straus, Mrs. Isidor (Rosalie Ida Blun) 1 female 63.0 1 0 221.7792 S 0.954021
pd[:5]
survived name pclass sex age sibsp parch fare embarked probability
0 1 Allen, Miss. Elisabeth Walton 1 female 29.0000 0 0 211.3375 S 0.973876
1 1 Allison, Master. Hudson Trevor 1 male 0.9167 1 2 151.5500 S 0.367609
2 0 Allison, Miss. Helen Loraine 1 female 2.0000 1 2 151.5500 S 0.965330
3 0 Allison, Mr. Hudson Joshua Creighton 1 male 30.0000 1 2 151.5500 S 0.295788
4 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) 1 female 25.0000 1 2 151.5500 S 0.961364

你可能感兴趣的:(TensorFlow)