用ANN预测客户流失数据

这次我们用python来对客户流失数据进行预测
相关完整代码与csv文件可以从我的GitHub地址获取
https://github.com/liuzuoping/Deep_Learning_note

读取客户流失数据

import pandas
df = pandas.read_csv('../data/customer_churn.csv', index_col=0, header = 0)
df.head()

用ANN预测客户流失数据_第1张图片

数据前处理

cat_var = ['international_plan','voice_mail_plan', 'churn']

for var in cat_var:
    df[var] = df[var].map(lambda e: 1 if e == 'yes' else 0)
df.head()
df.info()

用ANN预测客户流失数据_第2张图片
用ANN预测客户流失数据_第3张图片

区分训练与测试数据集

y = df.iloc[:,-1]
x = df.iloc[:,:-1]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 123)

尺度标准化

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

训练ANN

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
def tarinProcess(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    history = classifier.fit(x_train, y_train,
                        batch_size=10,
                        epochs=100,
                        verbose=0,
                        validation_data=(x_test, y_test))
    return history
history1 = tarinProcess('sgd')
history2 = tarinProcess('RMSprop')
history3 = tarinProcess('Adagrad')
history4 = tarinProcess('Adadelta')
history5 = tarinProcess('Adam')

K-fold 交叉验证

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
def buildClassifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return classifier
classifier = KerasClassifier(build_fn = buildClassifier, batch_size = 10, epochs = 100, optimizer = 'adam')
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()

用ANN预测客户流失数据_第4张图片

accuracies
mean
variance

array([0.91051453, 0.91498882, 0.88143176, 0.85874438, 0.9304933 ])
0.8992345571517945
0.02572522922509814

Dropout

from keras.layers import Dropout
def buildClassifierWithDropout(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dropout(rate=0.1))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return classifier
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, batch_size = 10, epochs = 100, verbose = 0, optimizer='adam' )
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()
accuracies
mean
variance

array([0.85458612, 0.89038032, 0.87695748, 0.85201794, 0.86547083])
0.8678825378417969
0.01430245638705941

Grid Search

from sklearn.model_selection import GridSearchCV
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, epochs = 100)
parameters = {'batch_size': [10, 15],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5)
grid_search     = grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy   = grid_search.best_score_

用ANN预测客户流失数据_第5张图片

你可能感兴趣的:(Python机器学习与深度学习)