1.训练好一个Model以后需要保存和再次预测
2.有两个模块用来保存模型 : pickle和joblib
3.Sklearn的模型导出本质上是利用Python的Pickle机制。对Python的函数进行序列化,也就是把训练好的Transformer函数序列化并存为文件。
代码流程:
1.保存Model(注:save文件夹要预先建立,否则会报错)
joblib.dump(clf, ‘save/clf.pkl’)
2.读取Model
clf2 = joblib.load(‘save/clf.pkl’)
3.测试读取后的Model
print(clf2.predict(X[0:1]))**
实例一,SVM分类模型
import pickle
from sklearn.externals import joblib
from sklearn.svm import SVC
from sklearn import datasets
#定义一个分类器
svm = SVC()
iris = datasets.load_iris()
X = iris.data
y = iris.target
#训练模型
svm.fit(X,y)
#1.保存成Python支持的文件格式Pickle
#在当前目录下可以看到svm.pickle
with open('svm.pickle','wb') as fw:
pickle.dump(svm,fw)
#加载svm.pickle
with open('svm.pickle','rb') as fr:
new_svm1 = pickle.load(fr)
# print (new_svm1.predict(X[0:1]))
#2.保存成sklearn自带的文件格式Joblib
joblib.dump(svm,'svm.pkl')
#加载svm.pkl
new_svm2 = joblib.load('svm.pkl')
print (new_svm2.predict(X[0:1]))
运行结果
————————————————————
补充:
——————————————————————
实例二,随机森林模型
#随机森林
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.externals import joblib
#1.保存模型
data = load_iris()
X,y = data["data"],data["target"]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33,random_state = 42)
clf = RandomForestClassifier(max_depth = 2,random_state = 0)
clf.fit(X_train,y_train)
#print (clf.feature_importances_)
"""
print (
classification_report(y_test,clf.predict(X_test),target_names = data["target_names"])
)
"""
joblib.dump(clf,"classification.pkl")
#2.加载模型
clf2 = joblib.load('classification.pkl')
print (clf2.feature_importances_)
print (
classification_report(y,clf.predict(X),target_names = data["target_names"])
)
运行结果1
运行结果2