实际工程开发中,算法同学用python对训练模型后需要给Java后端调用,但一般的joblib.dump
保存的模型Java后端无法直接使用,因此借助专门的python模型保存库和对应的jar包,来进行模型的保存和读取。
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=195, max_depth=14, max_features=11, oob_score=True,
random_state=123, n_jobs=-1, verbose=0)
pipeline_model = PMMLPipeline([("random_forest", model)])
pipeline_model.fit(train_x, train_y)
sklearn2pmml(pipeline_model, "./model/random_forest.pmml", with_repr=True)
需要用到的依赖
org.jpmml
pmml-evaluator
1.6.4
具体实现调用代码
import org.dmg.pmml.Field;
import org.jpmml.model.*;
import org.jpmml.evaluator.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class load_model {
public Evaluator load_pmml (String pmml_model_path) throws Exception{
Evaluator evaluator = new LoadingModelEvaluatorBuilder().load(new File(pmml_model_path)).build();
return evaluator;
}
public static Float accuracy(int tp, int tn, int fp, int fn){
float tp1 = (float) tp;
float tn1 = (float) tn;
float fp1 = (float) fp;
float fn1 = (float) fn;
return (tp1 + tn1) / (tp1 + tn1 + fp1 + fn1);
}
public static Float metric_precision(int tp, int tn, int fp, int fn){
float tp1 = (float) tp;
float tn1 = (float) tn;
float fp1 = (float) fp;
float fn1 = (float) fn;
return (tp1) / (tp1 + fp1);
}
public static Float metric_recall(int tp, int tn, int fp, int fn){
float tp1 = (float) tp;
float tn1 = (float) tn;
float fp1 = (float) fp;
float fn1 = (float) fn;
return (tp1) / (tp1 + fn1);
}
public boolean check_input(Evaluator evaluator, Map input_sample){
List inputFields = evaluator.getInputFields();
List input_name = new ArrayList();
int index = 0;
for (InputField inputField : inputFields){
input_name.add(inputField.getName().toString());
boolean flag = input_sample.containsKey(inputField.getName().toString());
if (!flag){
index ++;
System.out.println( index + " 输入数据缺少该特征:" + inputField);
}
}
int redundant = 0;
for (String i : input_sample.keySet()){
if (!input_name.contains(i)){
redundant ++;
System.out.println(redundant + " 输入特征冗余: " + i );
}
}
if (index != 0 || redundant != 0){
return false;
}
return true;
}
public List
1.6.4
,如果使用低版本的jar包,模型的读取方式以及数据的读取方式都会发生改变,且如果sklearn2pmml版本较高,而jar包版本较低,Java会无法成功读取模型。例如:sklearn2pmml保存的模型版本为4-4(保存的模型可以直接以字符形式打开查看,前几行中会有版本信息),而jar包版本为1.4.3,则会出现以下错误:PMML namespace URI http://www.dmg.org/PMML-4_4 is not supported