哈工大ltp应用java版

哈工大的ltp机器学习python版本很多,现在用java版本实现以下,网上有很多的方法是需要自己编译,我这边编译完成了

model下载:
链接: https://pan.baidu.com/s/1HDaZpsrPHDcu8P15ho41VQ 提取码: dw9x
ltp编译后的文件:
https://download.csdn.net/download/qq_16613311/12489534

如果不想打赏积分也可以参考进行自己编译:
http://codepub.cn/2015/05/07/Compile-the-Language-Technology-Platform(C++)-and-LTP4J(Java)source-code/

下边是我写的相关类:

package com.dbapp.database.scanning.util;

import edu.hit.ir.ltp4j.NER;
import edu.hit.ir.ltp4j.Postagger;
import edu.hit.ir.ltp4j.Segmentor;
import lombok.extern.slf4j.Slf4j;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName: ltp4jUtil
 * 哈工大ltp机器学习引入
 * @Author: yongtao.ding on 2020/5/25 16:41
 */
@Slf4j
public class Ltp4jUtil {

	//中文地名
	public static String ADDRESS_C="ns";
	//地名
	public static String ADDRESS_L="nl";
	//人名
	public static String  PERSONNAME_C="nh";
	//机构团体名
	public static String GROUPS_G="ni";
	//公检法
	public static String GOVERNMENT="j";

	/**
	 * 判断一句话中是否存在地名,先使用segmentor将内容分词,使用postagger判断是否为地名
	 * @param postagger
	 * @param segmentor
	 * @param content 内容
	 * @return
	 */
	public static Boolean isAddress(Postagger postagger, Segmentor segmentor,String content){
		List words = new ArrayList();
		segmentor.segment(content, words);
		List postags = new ArrayList();
		int size = postagger.postag(words,postags);
		boolean isAddress=false;
		for (int i = 0; i < size; i++) {
			if (postags.get(i).equals(ADDRESS_C)||postags.get(i).equals(ADDRESS_L)){
				isAddress=true;
				break;
			}
		}
		return isAddress;
	}

	/**
	 * 判断一句话中是否存在企业团体,先使用segmentor将内容分词,使用postagger判断是否为企业团体
	 * @param postagger
	 * @param segmentor
	 * @param content 内容
	 * @return
	 */
	public static Boolean isCorporateGroups(Postagger postagger, Segmentor segmentor,NER ner,String content){
		Boolean isGroups=false;
		List words = new ArrayList();
		List tags = new ArrayList();
		List ners = new ArrayList();
		segmentor.segment(content, words);
		List postags = new ArrayList();
		int size = postagger.postag(words,postags);
		for (int i = 0; i < size; i++) {
			tags.add(postags.get(i));
		}
		ner.recognize(words, tags, ners);
		for (int i = 0; i < words.size(); i++) {
			System.out.println(words.get(i)+"/"+ners.get(i));
		}
		if (ners.contains("E-Ni")){
			isGroups=true;
			return isGroups;
		}
		return isGroups;
	}

	/**
	 * 判断一句话中是否存在公检法,先使用segmentor将内容分词,使用postagger判断是否为公检法
	 * @param postagger
	 * @param segmentor
	 * @param content 内容
	 * @return
	 */
	public static Boolean isAbbreviation(Postagger postagger, Segmentor segmentor,String content){
//		List words = new ArrayList();
//		segmentor.segment(content, words);
		List list = new ArrayList<>();
		list.add(content);
		List postags = new ArrayList();
		int size = postagger.postag(list,postags);
		boolean isGovernment=false;
		for (int i = 0; i < size; i++) {
			if (postags.get(i).equals(GOVERNMENT)){
				isGovernment=true;
				break;
			}
		}
		return isGovernment;
	}

	/**
	 * 判断一句话中是否存在人名,先使用segmentor将内容分词,使用postagger判断是否为人名
	 * @param postagger
	 * @param segmentor
	 * @param content 内容
	 * @return
	 */
	public static Boolean isPersonName(Postagger postagger, Segmentor segmentor,String content){
		List words = new ArrayList();
		segmentor.segment(content, words);
		List postags = new ArrayList();
		int size = postagger.postag(words,postags);
		boolean isPersonName=false;
		for (int i = 0; i < size; i++) {
			if (postags.get(i).equals(PERSONNAME_C)){
				isPersonName=true;
				break;
			}
		}
		return isPersonName;
	}

	/**
	 * 识别地名,返回地名所占比例
	 * @param words 字段内容
	 * @return
	 */
	public static double identificationAddress( Postagger postagger,Segmentor segmentor,List words){
		int n=0;
		for (int i = 0; i < words.size(); i++) {
			String s = words.get(i);
			Boolean address = isAddress(postagger, segmentor, s);
			if (address){
				n+=1;
			}
		}
		double proportion = new BigDecimal((float)n/words.size()).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
		return proportion;
	}
	/**
	 * 识别人名,返回人名所占比例
	 * @param words 字段内容
	 * @return
	 */
	public static double identificationPersonName( Postagger postagger,Segmentor segmentor,List words){
		int n=0;
		for (int i = 0; i < words.size(); i++) {
			String s = words.get(i);
			Boolean address = isPersonName(postagger, segmentor, s);
			if (address){
				n+=1;
			}
		}
		double proportion = new BigDecimal((float)n/words.size()).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
		return proportion;
	}

	/**
	 * 识别企业团体,返回企业团体所占比例
	 * @param words 字段内容
	 * @return
	 */
	public static double identificationGroups( Postagger postagger,Segmentor segmentor,NER ner,List words){
		int n=0;
		for (int i = 0; i < words.size(); i++) {
			String s = words.get(i);
			Boolean groups = isCorporateGroups(postagger, segmentor, ner,s);
			if (groups){
				n+=1;
			}
		}
		double proportion = new BigDecimal((float)n/words.size()).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
		return proportion;
	}
	/**
	 * 识别公检法,返回公检法所占比例
	 * @param words 字段内容
	 * @return
	 */
	public static double identificationGovernment( Postagger postagger,Segmentor segmentor,List words){
		int n=0;
		for (int i = 0; i < words.size(); i++) {
			String s = words.get(i);
			Boolean groups = isAbbreviation(postagger, segmentor, s);
			if (groups){
				n+=1;
			}
		}
		double proportion = new BigDecimal((float)n/words.size()).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
		return proportion;
	}

	public static void main(String[] args) {
		/*System.load("D:\\test\\ltp\\ltp4j-0.1.0-SNAPSHOT.dll");
		Postagger postagger = new Postagger();
		Segmentor segmentor = new Segmentor();
		if (postagger.create("D:\\test\\ltp\\pos.model") < 0) {
			log.error("pos.model加载失败");
		}
		if (segmentor.create("D:\\test\\ltp\\cws.model") < 0) {
			log.error("load cws.model failed");
		}
		List words = new ArrayList();
		words.add("上海澜海实业有限公司");
		words.add("上海丰临进出口有限公司");
		words.add("上海宝钢国际经济贸易有限公司");
		words.add("上海诚齐机械制造有限公司");
		double identificationName = Ltp4jUtil.identificationPersonName(postagger,segmentor,words);
		double identificationAddress = Ltp4jUtil.identificationAddress(postagger,segmentor,words);
		double identificationGroups = Ltp4jUtil.identificationGroups(postagger,segmentor,words);
		double identificationGovernment = Ltp4jUtil.identificationGovernment(postagger,segmentor,words);
		postagger.release();
		segmentor.release();
		System.out.println("地点:"+identificationAddress);
		System.out.println("人名:"+identificationName);
		System.out.println("企业:"+identificationGroups);
		System.out.println("公检法:"+identificationGovernment);

*/
		String content="上海丰临进出口有限公司";
		LoadLibsUtil.addDirToPath("libs");
		NER ner = new NER();
		Postagger postagger = new Postagger();
		Segmentor segmentor = new Segmentor();
		if (ner.create("D:\\test\\ltp\\ner.model") < 0) {
			log.error("pos.model加载失败");
		}
		if (postagger.create("D:\\test\\ltp\\pos.model") < 0) {
			log.error("pos.model加载失败");
		}
		if (segmentor.create("D:\\test\\ltp\\cws.model") < 0) {
			log.error("load cws.model failed");
		}

		Boolean s=isCorporateGroups(postagger,segmentor,ner,content);
		System.out.println(s);


		postagger.release();
		segmentor.release();
		ner.release();

	}

}

package com.dbapp.database.scanning.util;

import java.lang.reflect.Field;

/**
 * @ClassName: LoadLibsUtil
 * @Author: yongtao.ding on 2020/5/26 9:09
 */
public class LoadLibsUtil {

	public static void addDirToPath(String s){
		try {
			//获取系统path变量对象
			Field field=ClassLoader.class.getDeclaredField("sys_paths");
			//设置此变量对象可访问
			field.setAccessible(true);
			//获取此变量对象的值
			String[] path=(String[])field.get(null);
			//创建字符串数组,在原来的数组长度上增加一个,用于存放增加的目录
			String[] tem=new String[path.length+1];
			//将原来的path变量复制到tem中
			System.arraycopy(path,0,tem,0,path.length);
			//将增加的目录存入新的变量数组中
			tem[path.length]=s;
			//将增加目录后的数组赋给path变量对象
			field.set(null,tem);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

关于加载dll:
代码中

LoadLibsUtil.addDirToPath("libs");

哈工大ltp应用java版_第1张图片
有问题评论,能不能看到随缘~~~~

你可能感兴趣的:(问题)