Xgboost存储为pmml以及在java中调用

首先,我们构建一个xgboost模型并存储为PMML形式,使用到的包是sklearn2pmml,可以将sklearn中的模型保存为PMML的形式

import pandas as pd
from xgboost.sklearn import XGBClassifier
from sklearn2pmml import PMMLPipeline
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import sklearn2pmml


df = pd.read_excel('/Users/huoshirui/Desktop/xyworking/pythonData/dataClean/kexin_data_huoshirui.xlsx')
df = df.drop(columns=['mbl_no'])

clf = XGBClassifier(
 learning_rate=0.01,
 n_estimators=1000,
 max_depth=4,
 min_child_weight=1,
 gamma=0.0001,
 subsample=0.3,
 colsample_bytree=0.8,
 colsample_bylevel=0.7,
 objective='binary:logistic',
 nthread=-1,
 scale_pos_weight=1,
 seed=666)

mapper = DataFrameMapper([
    (['kx_output_riskscore'], None),
    (['kx_new_risk_0'], None),
    (['kx_new_risk_1'], None),
    (['kx_new_risk_2'], None),
    (['kx_new_risk_3'], None),
    (['kx_new_risk_4'], None),
    (['kx_new_risk_5'], None),
    (['kx_new_risk_6'], None),
    (['kx_new_risk_7'], None),
    (['kx_new_risk_8'], None),
    (['kx_new_risk_11'], None),
    (['kx_new_risk_12'], None),
    (['kx_new_risk_13'], None),
    (['kx_new_risk_14'], None),
    (['kx_new_risk_15'], None),
    (['kx_new_risk_sumList'], None),
    (['kx_new_is_riskList'], None)
])


pipeline = PMMLPipeline([('mapper', mapper), ("classifier", clf)])

pipeline.fit(df[df.columns.difference(["target"])],df["target"])
# 存储为PMML形式
sklearn2pmml(pipeline,"/Users/huoshirui/Desktop/test/PMML/xgboost.pmml",with_repr = True)

然后我们可以在当前文件夹中得到xgboost.pmml文件,可以使用编辑器直接打开查看,部分内容如下:



	
2018-12-05T02:44:50Z
PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False, features=[(['kx_output_riskscore'], None), (['kx_new_risk_0'], None), (['kx_new_risk_1'], None), (['kx_new_risk_2'], None), (['kx_new_risk_3'], None), (['kx_new_risk_4'], None), (['kx_new_risk_5'], None), (['kx_new_risk_6'], None), (['kx_new_risk_7'], None), (['kx_new_risk_8'], None), (['kx_new_risk_11'], None), (['kx_new_risk_12'], None), (['kx_new_risk_13'], None), (['kx_new_risk_14'], None), (['kx_new_risk_15'], None), (['kx_new_risk_sumList'], None), (['kx_new_is_riskList'], None)], input_df=False, sparse=False)), ('classifier', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=0.7, colsample_bytree=0.8, gamma=0.0001, learning_rate=0.01, max_delta_step=0, max_depth=4, min_child_weight=1, missing=None, n_estimators=1000, n_jobs=1, nthread=-1, objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=666, silent=True, subsample=0.3))])

有了PMML模型文件,我们就可以写JAVA代码来读取加载这个模型并做预测了。
java代码如下:

package com.seeyon.apps.outerspace.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.dmg.pmml.FieldName;
import org.dmg.pmml.PMML;
import org.jpmml.evaluator.Evaluator;
import org.jpmml.evaluator.FieldValue;
import org.jpmml.evaluator.InputField;
import org.jpmml.evaluator.ModelEvaluator;
import org.jpmml.evaluator.ModelEvaluatorFactory;
import org.jpmml.evaluator.TargetField;

public class jpmml {
	
    
    public static void main(String[] args) throws Exception {
		String  pathxml="/Users/huoshirui/Desktop/test/PMML/xgboost.pmml";
		Map  map=new HashMap();
		map.put("kx_output_riskscore", 400D);
		map.put("kx_new_risk_0", 0D);
		map.put("kx_new_risk_1", 1D);
		map.put("kx_new_risk_2", 1D);
		map.put("kx_new_risk_3", 1D);
		map.put("kx_new_risk_4", 0D);
		map.put("kx_new_risk_5", 0D);
		map.put("kx_new_risk_6", 0D);
		map.put("kx_new_risk_7", 0D);
		map.put("kx_new_risk_8", 0D);
		map.put("kx_new_risk_11", 0D);
		map.put("kx_new_risk_12", 0D);
		map.put("kx_new_risk_13", 0D);
		map.put("kx_new_risk_14", 0D);
		map.put("kx_new_risk_15", 0D);
		map.put("kx_new_risk_sumList", 2D);
		map.put("kx_new_is_riskList", 1D);
		predictLrHeart(map, pathxml);
	}
	
	public static void predictLrHeart(Map kxmap,String  pathxml)throws Exception {
 
		PMML pmml;
		
		File file = new File(pathxml);
		InputStream inputStream = new FileInputStream(file);
		try (InputStream is = inputStream) {
			pmml = org.jpmml.model.PMMLUtil.unmarshal(is);
 
			ModelEvaluatorFactory modelEvaluatorFactory = ModelEvaluatorFactory
					.newInstance();
			ModelEvaluator modelEvaluator = modelEvaluatorFactory
					.newModelEvaluator(pmml);
			Evaluator evaluator = (Evaluator) modelEvaluator;
 
			List inputFields = evaluator.getInputFields();
			
			Map arguments = new LinkedHashMap();
			for (InputField inputField : inputFields) {
				FieldName inputFieldName = inputField.getName();
				Object rawValue = kxmap
						.get(inputFieldName.getValue());
				FieldValue inputFieldValue = inputField.prepare(rawValue);
				arguments.put(inputFieldName, inputFieldValue);
			}
 
			Map results = evaluator.evaluate(arguments);
			List targetFields = evaluator.getTargetFields();
		
			for (TargetField targetField : targetFields) {
				FieldName targetFieldName = targetField.getName();
				Object targetFieldValue = results.get(targetFieldName);
				System.out.println("target: " + targetFieldName.getValue()
						+ " value: " + targetFieldValue);
			}
		}catch (Exception e) {
			inputStream.close();
		}
	}
}

编译运行后结果:
target: target value: ProbabilityDistribution{result=1, probability_entries=[1=0.69272673, 0=0.30727327]}

你可能感兴趣的:(Xgboost存储为pmml以及在java中调用)