from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer, mean_absolute_error, mean_squared_error
import logging
import sys
import pandas as pd
from future.utils import with_metaclass
from sklearn.linear_model import *
from sklearn.calibration import *
from sklearn import metrics
from modeldb.thrift.modeldb.ttypes import ExperimentRun ,Project,Experiment
from modeldb.basic.Structs import (
Model, ModelConfig, ModelMetrics, Dataset)
from sklearn.ensemble import *
from sklearn.pipeline import Pipeline
from modeldb.utils.Singleton import Singleton
from modeldb.basic import *
from modeldb.events import *
from modeldb.thrift.modeldb import ttypes as modeldb_types
from modeldb.thrift.modeldb import ModelDBService
# from ..basic import *
# from ..events import *
# from ..thrift.modeldb import ModelDBService
# from ..thrift.modeldb import ttypes as modeldb_types
from pymongo import MongoClient
import gridfs
from bson.objectid import ObjectId
import logging
import sklearn.metrics
from sklearn.externals import joblib
import sklearn, sklearn_pandas
import os
import re
from glob import glob
import pickle
# from ..events import FitEvent ,TransformEvent,PipelineEvent,GridSearchCVEvent,MetricEvent,RandomSplitEvent,ExperimentEvent
import numpy as np
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from keras.layers import Input,Dense, Activation, Embedding, LSTM
from keras.layers import Conv2D, MaxPooling2D, Flatten,TimeDistributed
from keras import backend as K
from keras.models import Sequential,Model
from keras.optimizers import SGD,Adagrad
import logging
logger=logging.getLogger(__name__)
def fit_fn(self,x_train,y_train,epochs=5,batch_size=32,**params):
logger.info("fit model for keras")
model=self.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
if params ==None:
params={'epochs':epochs,'batch_size':batch_size}
fit_event=FitEvent(model,self,x_train,params)
Syncer.instance.add_to_buffer(event=fit_event)
return model
def compile_fn(self,loss='categorical_crossentropy',optimizer= SGD(lr=0.01, momentum=0.9, nesterov=True),metrics=['accuracy']):
logger.info("compile the model")
from keras.optimizers import SGD
model=self.compile(loss=loss, optimizer=optimizer,metrics=metrics)
# transform_event=TransformEvent()
# Syncer.instance.add_to_buffer(event=transform_event)
return model
def train_batch_fn(self,x_batch,y_batch):
logger.info("batch train the dataset")
model=self.train_on_batch(x_batch, y_batch)
# transform_event=TransformEvent()
# Syncer.instance.add_to_buffer(event=transform_event)
return model
def convert_prediction_to_event(model, predict_array, x):
predict_df = pd.DataFrame(predict_array)
# Assign names to the predicted columns.
# This is to ensure there are no merge conflicts when joining.
num_pred_cols = predict_df.shape[1]
pred_col_names = []
for i in range(0, num_pred_cols):
pred_col_names.append('pred_' + str(i))
predict_df.columns = pred_col_names
if not isinstance(x, pd.DataFrame):
x_to_df = pd.DataFrame(x)
new_df = x_to_df.join(predict_df)
else:
new_df = x.join(predict_df)
predict_event = TransformEvent(x, new_df, model)
Syncer.instance.add_to_buffer(predict_event)
return predict_array
def predict_fn(self,x_test,batch_size=128):
logger.info("predict use the model")
predict_array = self.predict(x_test, batch_size=batch_size)
return convert_prediction_to_event(self, predict_array, x_test)
def compute_roc_auc_sync(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
roc_auc=metrics.roc_auc_score(test_y,y_pred)
print("compute is "+ str(roc_auc))
metrics_event=MetricEvent(df,self,label_col,prediction_col,metrics.roc_auc_score.__name__,roc_auc)
Syncer.instance.add_to_buffer(metrics_event)
return roc_auc
def compute_mean_absolute_error(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
mae=mean_absolute_error(test_y,y_pred=y_pred)
print("compute is "+ str(mae))
metrics_event=MetricEvent(df,self,label_col,prediction_col,mean_absolute_error.__name__,mae)
Syncer.instance.add_to_buffer(metrics_event)
return mae
def compute_mean_squared_error(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
mse=mean_squared_error(test_y,y_pred=y_pred)
metrics_event=MetricEvent(df,self,label_col,prediction_col,mean_squared_error.__name__,mse)
Syncer.instance.add_to_buffer(metrics_event)
return mse
def compute_accuracy_score_sync(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
y_pred_binary = (y_pred >= 0.5) * 1
accuracy_score=metrics.accuracy_score(test_y,y_pred_binary)
metrics_event=MetricEvent(df,self,label_col,prediction_col,metrics.accuracy_score.__name__,accuracy_score)
Syncer.instance.add_to_buffer(metrics_event)
return accuracy_score
def compute_recall_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
y_pred_binary = (y_pred >= 0.5) * 1
recall_score=metrics.recall_score(test_y,y_pred_binary)
metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.recall_score.__name__,recall_score)
Syncer.instance.add_to_buffer(metrics_event)
return recall_score
def compute_precision_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
y_pred_binary = (y_pred >= 0.5) * 1
print("hello precision")
precision_score=metrics.precision_score(test_y,y_pred_binary)
metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.precision_score.__name__,precision_score)
Syncer.instance.add_to_buffer(metrics_event)
print(Syncer.buffer_list)
return precision_score
def compute_f1_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
y_pred_binary = (y_pred >= 0.5) * 1
f1_score=metrics.f1_score(test_y,y_pred_binary)
metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.f1_score.__name__,f1_score)
Syncer.instance.add_to_buffer(metrics_event)
return f1_score
switch={
'roc_auc':compute_accuracy_score_sync,
'f1_score':compute_f1_score_sync,
'precision_score':compute_precision_score_sync,
'recall_score':compute_recall_score_sync,
'accuracy_score':compute_accuracy_score_sync,
'mean_squared_error':compute_mean_squared_error,
'mean_absolute_error':compute_mean_absolute_error
}
def metrics_fn(self,metric_func, test_y, y_pred, df, prediction_col='', label_col='', **params):
logger.info("metrics the model ")
try:
score= switch[metric_func](self,test_y,y_pred,df,prediction_col,label_col)
return score
except KeyError as e:
logger.error(str(e))
pass
metric_event=MetricEvent()
Syncer.instance.add_to_buffer(event=metric_event)
import json
from keras.layers.wrappers import Wrapper
def get_model_struct(model,show_layer_names=True,show_shapes=True,is_json=False):
layers = model.layers
for lay in layers:
print(vars(lay))
struct_dict=dict()
for layer in layers:
layer_id = str(id(layer))
z_dict=dict()
# print(layer.optimizer)
act_name=layer.activation.__name__
z_dict["activation"]=act_name
# Append a wrapped layer's label to node's label, if it exists.
layer_name = layer.name
class_name = layer.__class__.__name__
if isinstance(layer, Wrapper):
layer_name = '{}({})'.format(layer_name, layer.layer.name)
child_class_name = layer.layer.__class__.__name__
class_name = '{}({})'.format(class_name, child_class_name)
z_dict['layer_child_type']=child_class_name
z_dict['class_name']=class_name
# print(class_name)
# Create node's label.
if show_layer_names:
label = '{}: {}'.format(layer_name, class_name)
z_dict['layer_name']=layer_name
z_dict['layer_type']=class_name
# print(label)
else:
label = class_name
# Rebuild the label as a table including input/output shapes.
if show_shapes:
try:
outputlabels = str(layer.output_shape)
except AttributeError:
outputlabels = 'multiple'
if hasattr(layer, 'input_shape'):
inputlabels = str(layer.input_shape)
elif hasattr(layer, 'input_shapes'):
inputlabels = ', '.join(
[str(ishape) for ishape in layer.input_shapes])
else:
inputlabels = 'multiple'
#
# label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label,
# inputlabels,
# outputlabels)
z_dict['input_shape']=inputlabels
z_dict['output_shape']=outputlabels
# print(label)
struct_dict[layer_name]=z_dict
struct_dict['loss']=str(model.loss)
struct_dict['metrics']=str(model.metrics)
struct_dict['optimazer']=str(model.optimizer.__class__)
print(struct_dict)
if is_json:
struct_json = json.dumps(struct_dictz)
return struct_json
else:
return struct_dict
class Syncer(with_metaclass(Singleton, ModelDbSyncerBase.Syncer)):
instance = None
def __init__(self, project_config, experiment_config, experiment_run_config,
thrift_config=None):
self.enable_keras_fn()
self.local_id_to_path = {}
Syncer.instance = self
super(Syncer, self).__init__(project_config, experiment_config,experiment_run_config, thrift_config)
def __str__(self):
return "keras_syncer"
def enable_keras_fn(self):
from keras.models import Model,Sequential
for cls in [Model,Sequential]:
setattr(cls,"fit_sync",fit_fn)
setattr(cls,'compile_sync',compile_fn)
setattr(cls,'predict_sync',predict_fn)
setattr(cls,'metrics_sync',metrics_fn)