这次我们用python来对客户流失数据进行预测
相关完整代码与csv文件可以从我的GitHub地址获取
https://github.com/liuzuoping/Deep_Learning_note
import pandas
df = pandas.read_csv('../data/customer_churn.csv', index_col=0, header = 0)
df.head()
cat_var = ['international_plan','voice_mail_plan', 'churn']
for var in cat_var:
df[var] = df[var].map(lambda e: 1 if e == 'yes' else 0)
df.head()
df.info()
y = df.iloc[:,-1]
x = df.iloc[:,:-1]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 123)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
def tarinProcess(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
history = classifier.fit(x_train, y_train,
batch_size=10,
epochs=100,
verbose=0,
validation_data=(x_test, y_test))
return history
history1 = tarinProcess('sgd')
history2 = tarinProcess('RMSprop')
history3 = tarinProcess('Adagrad')
history4 = tarinProcess('Adadelta')
history5 = tarinProcess('Adam')
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
def buildClassifier(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = buildClassifier, batch_size = 10, epochs = 100, optimizer = 'adam')
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()
accuracies
mean
variance
array([0.91051453, 0.91498882, 0.88143176, 0.85874438, 0.9304933 ])
0.8992345571517945
0.02572522922509814
from keras.layers import Dropout
def buildClassifierWithDropout(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = 16))
classifier.add(Dropout(rate=0.1))
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate=0.1))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, batch_size = 10, epochs = 100, verbose = 0, optimizer='adam' )
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 5)
mean = accuracies.mean()
variance = accuracies.std()
accuracies
mean
variance
array([0.85458612, 0.89038032, 0.87695748, 0.85201794, 0.86547083])
0.8678825378417969
0.01430245638705941
from sklearn.model_selection import GridSearchCV
classifier = KerasClassifier(build_fn = buildClassifierWithDropout, epochs = 100)
parameters = {'batch_size': [10, 15],
'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
param_grid = parameters,
scoring = 'accuracy',
cv = 5)
grid_search = grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_