keras-syncer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer, mean_absolute_error, mean_squared_error
import  logging
import sys
import pandas as pd
from future.utils import with_metaclass
from sklearn.linear_model import *

from sklearn.calibration import *
from sklearn import  metrics
from modeldb.thrift.modeldb.ttypes import  ExperimentRun ,Project,Experiment
from modeldb.basic.Structs import (
            Model, ModelConfig, ModelMetrics, Dataset)
from sklearn.ensemble import *
from sklearn.pipeline import Pipeline
from modeldb.utils.Singleton import Singleton
from modeldb.basic import *
from modeldb.events import *

from modeldb.thrift.modeldb import  ttypes as modeldb_types
from modeldb.thrift.modeldb import  ModelDBService
# from ..basic import *
# from ..events import *
# from ..thrift.modeldb import ModelDBService
# from ..thrift.modeldb import ttypes as modeldb_types

from pymongo import MongoClient
import  gridfs
from bson.objectid import ObjectId
import  logging
import sklearn.metrics
from sklearn.externals import joblib
import sklearn, sklearn_pandas

import os
import re
from glob import glob
import pickle
# from ..events import FitEvent ,TransformEvent,PipelineEvent,GridSearchCVEvent,MetricEvent,RandomSplitEvent,ExperimentEvent

import numpy as np
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

from keras.layers import Input,Dense, Activation, Embedding, LSTM
from keras.layers import Conv2D, MaxPooling2D, Flatten,TimeDistributed
from keras import backend as K
from keras.models import Sequential,Model
from keras.optimizers import SGD,Adagrad
import logging

logger=logging.getLogger(__name__)

def fit_fn(self,x_train,y_train,epochs=5,batch_size=32,**params):
    logger.info("fit model for keras")
    model=self.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
    if params ==None:
        params={'epochs':epochs,'batch_size':batch_size}
    fit_event=FitEvent(model,self,x_train,params)
    Syncer.instance.add_to_buffer(event=fit_event)
    return model


def  compile_fn(self,loss='categorical_crossentropy',optimizer= SGD(lr=0.01, momentum=0.9, nesterov=True),metrics=['accuracy']):
    logger.info("compile the model")
    from keras.optimizers import SGD

    model=self.compile(loss=loss, optimizer=optimizer,metrics=metrics)
    # transform_event=TransformEvent()
    # Syncer.instance.add_to_buffer(event=transform_event)
    return model


def  train_batch_fn(self,x_batch,y_batch):
    logger.info("batch train the dataset")
    model=self.train_on_batch(x_batch, y_batch)
    # transform_event=TransformEvent()
    # Syncer.instance.add_to_buffer(event=transform_event)
    return model

def convert_prediction_to_event(model, predict_array, x):
    predict_df = pd.DataFrame(predict_array)
    # Assign names to the predicted columns.
    # This is to ensure there are no merge conflicts when joining.
    num_pred_cols = predict_df.shape[1]
    pred_col_names = []
    for i in range(0, num_pred_cols):
        pred_col_names.append('pred_' + str(i))
    predict_df.columns = pred_col_names
    if not isinstance(x, pd.DataFrame):
        x_to_df = pd.DataFrame(x)
        new_df = x_to_df.join(predict_df)
    else:
        new_df = x.join(predict_df)
    predict_event = TransformEvent(x, new_df, model)
    Syncer.instance.add_to_buffer(predict_event)
    return predict_array


def  predict_fn(self,x_test,batch_size=128):
    logger.info("predict use the model")
    predict_array = self.predict(x_test, batch_size=batch_size)
    return convert_prediction_to_event(self, predict_array, x_test)


def compute_roc_auc_sync(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
    roc_auc=metrics.roc_auc_score(test_y,y_pred)
    print("compute is "+ str(roc_auc))
    metrics_event=MetricEvent(df,self,label_col,prediction_col,metrics.roc_auc_score.__name__,roc_auc)
    Syncer.instance.add_to_buffer(metrics_event)

    return roc_auc

def compute_mean_absolute_error(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
    mae=mean_absolute_error(test_y,y_pred=y_pred)
    print("compute is "+ str(mae))
    metrics_event=MetricEvent(df,self,label_col,prediction_col,mean_absolute_error.__name__,mae)
    Syncer.instance.add_to_buffer(metrics_event)
    return mae

def  compute_mean_squared_error(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
    mse=mean_squared_error(test_y,y_pred=y_pred)
    metrics_event=MetricEvent(df,self,label_col,prediction_col,mean_squared_error.__name__,mse)
    Syncer.instance.add_to_buffer(metrics_event)
    return mse


def compute_accuracy_score_sync(self,test_y,y_pred,df,prediction_col='',label_col='',**params):
    y_pred_binary = (y_pred >= 0.5) * 1
    accuracy_score=metrics.accuracy_score(test_y,y_pred_binary)
    metrics_event=MetricEvent(df,self,label_col,prediction_col,metrics.accuracy_score.__name__,accuracy_score)
    Syncer.instance.add_to_buffer(metrics_event)
    return accuracy_score

def compute_recall_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
    y_pred_binary = (y_pred >= 0.5) * 1
    recall_score=metrics.recall_score(test_y,y_pred_binary)
    metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.recall_score.__name__,recall_score)
    Syncer.instance.add_to_buffer(metrics_event)
    return recall_score

def compute_precision_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
    y_pred_binary = (y_pred >= 0.5) * 1
    print("hello precision")
    precision_score=metrics.precision_score(test_y,y_pred_binary)
    metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.precision_score.__name__,precision_score)
    Syncer.instance.add_to_buffer(metrics_event)
    print(Syncer.buffer_list)
    return precision_score

def compute_f1_score_sync(self,test_y,y_pred,df,prediction_col='',lable_col='',**params):
    y_pred_binary = (y_pred >= 0.5) * 1
    f1_score=metrics.f1_score(test_y,y_pred_binary)
    metrics_event=MetricEvent(df,self,lable_col,prediction_col,metrics.f1_score.__name__,f1_score)
    Syncer.instance.add_to_buffer(metrics_event)
    return  f1_score

switch={
    'roc_auc':compute_accuracy_score_sync,
    'f1_score':compute_f1_score_sync,
    'precision_score':compute_precision_score_sync,
    'recall_score':compute_recall_score_sync,
    'accuracy_score':compute_accuracy_score_sync,
    'mean_squared_error':compute_mean_squared_error,
    'mean_absolute_error':compute_mean_absolute_error
}
def metrics_fn(self,metric_func, test_y, y_pred, df, prediction_col='', label_col='', **params):
    logger.info("metrics the model ")
    try:
        score= switch[metric_func](self,test_y,y_pred,df,prediction_col,label_col)
        return score
    except KeyError as e:
        logger.error(str(e))
        pass



    metric_event=MetricEvent()
    Syncer.instance.add_to_buffer(event=metric_event)
import  json
from  keras.layers.wrappers import Wrapper


def  get_model_struct(model,show_layer_names=True,show_shapes=True,is_json=False):
    layers = model.layers
    for lay in layers:
        print(vars(lay))
    struct_dict=dict()
    for layer in layers:
        layer_id = str(id(layer))
        z_dict=dict()
        # print(layer.optimizer)

        act_name=layer.activation.__name__
        z_dict["activation"]=act_name
        # Append a wrapped layer's label to node's label, if it exists.
        layer_name = layer.name
        class_name = layer.__class__.__name__
        if isinstance(layer, Wrapper):
            layer_name = '{}({})'.format(layer_name, layer.layer.name)
            child_class_name = layer.layer.__class__.__name__
            class_name = '{}({})'.format(class_name, child_class_name)
            z_dict['layer_child_type']=child_class_name
            z_dict['class_name']=class_name
            # print(class_name)
        # Create node's label.
        if show_layer_names:
            label = '{}: {}'.format(layer_name, class_name)
            z_dict['layer_name']=layer_name
            z_dict['layer_type']=class_name
            # print(label)
        else:
            label = class_name

        # Rebuild the label as a table including input/output shapes.
        if show_shapes:
            try:
                outputlabels = str(layer.output_shape)
            except AttributeError:
                outputlabels = 'multiple'
            if hasattr(layer, 'input_shape'):
                inputlabels = str(layer.input_shape)
            elif hasattr(layer, 'input_shapes'):
                inputlabels = ', '.join(
                    [str(ishape) for ishape in layer.input_shapes])
            else:
                inputlabels = 'multiple'
            #
            # label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label,
            #                                                inputlabels,
            #                                                outputlabels)
            z_dict['input_shape']=inputlabels
            z_dict['output_shape']=outputlabels
            # print(label)
            struct_dict[layer_name]=z_dict
    struct_dict['loss']=str(model.loss)
    struct_dict['metrics']=str(model.metrics)
    struct_dict['optimazer']=str(model.optimizer.__class__)

    print(struct_dict)
    if is_json:
        struct_json = json.dumps(struct_dictz)
        return struct_json
    else:
        return struct_dict
class Syncer(with_metaclass(Singleton, ModelDbSyncerBase.Syncer)):
    instance = None
    def __init__(self, project_config, experiment_config, experiment_run_config,
            thrift_config=None):
        self.enable_keras_fn()
        self.local_id_to_path = {}
        Syncer.instance = self

        super(Syncer, self).__init__(project_config, experiment_config,experiment_run_config, thrift_config)

    def __str__(self):
        return  "keras_syncer"

    def  enable_keras_fn(self):
        from keras.models import Model,Sequential
        for cls in [Model,Sequential]:
            setattr(cls,"fit_sync",fit_fn)
            setattr(cls,'compile_sync',compile_fn)
            setattr(cls,'predict_sync',predict_fn)
            setattr(cls,'metrics_sync',metrics_fn)


你可能感兴趣的:(keras-syncer)