/*** 对分类结果进行比较,得出概率最大的类
*@paramclassifyResult
*@return
*/
public static String getClassifyResultName(MapclassifyResult){
String classifyName= "";if(classifyResult.isEmpty()){returnclassifyName;
}
BigDecimal result= new BigDecimal(0);
Set classifyResultSet =classifyResult.keySet();for(String classifyResultSetString : classifyResultSet){if (classifyResult.get(classifyResultSetString).compareTo(result) >= 1){
result=classifyResult.get(classifyResultSetString);
classifyName=classifyResultSetString;
}
}returnclassifyName;
}/*** 统计给定类别下的单词总数(带词频计算)
*@paramcategoryLabel 指定类别参数
*@return
*/
public staticLong categoryWordCount(String categoryLabel){
Long sum= 0L;
Map> categoryWordMap =allTrainFileSegsMap.get(categoryLabel);if (categoryWordMap == null){returnsum;
}
Set categoryWordMapKeySet =categoryWordMap.keySet();for(String categoryLabelString : categoryWordMapKeySet){
Map categoryWordMapDataMap =categoryWordMap.get(categoryLabelString);
List> dataWordMapList = new ArrayList>(categoryWordMapDataMap.entrySet());for (int i=0; i
sum+=dataWordMapList.get(i).getValue();
}
}returnsum;
}/*** 获取训练样本所有词的总数(词总数计算是带上词频的,也就是可以重复算数)
*@return
*/
public staticLong getAllTrainCategoryWordsCount(){
Long sum= 0L;//获取所有分类
Set categoryLabels =allTrainFileSegsMap.keySet();//循环相加每个类下的词总数
for(String categoryLabel : categoryLabels){
sum+=categoryWordCount(categoryLabel);
}returnsum;
}/*** 获取训练样本下各个类别不重复词的总词数,区别于getAllTrainCategoryWordsCount()方法,此处计算不计算词频
* 备注:此处并不是严格意义上的进行全量词表生成后的计算,也就是加入类别1有"中国=6"、类别2有"中国=2",总词数算中国两次,
* 也就是说,我们在计算的时候并没有生成全局词表(将所有词都作为出现一次)
*@return
*/
public staticLong getAllTrainCategoryWordCount(){
Long sum= 0L;//获取所有分类
Set categoryLabels =allTrainFileSegsMap.keySet();for(String cateGoryLabelsLabel : categoryLabels){
Map> categoryWordMap =allTrainFileSegsMap.get(cateGoryLabelsLabel);
List>> categoryWordMapList = new ArrayList>>(categoryWordMap.entrySet());for (int i=0; i
sum+=categoryWordMapList.get(i).getValue().size();
}
}returnsum;
}/*** 计算测试数据的每个单词在每个类下出现的总数
*@paramword
*@paramcategoryLabel
*@return
*/
public staticLong wordInCategoryCount(String word, String categoryLabel){
Long sum= 0L;
Map> categoryWordMap =allTrainFileSegsMap.get(categoryLabel);
Set categoryWordMapKeySet =categoryWordMap.keySet();for(String categoryWordMapKeySetFile : categoryWordMapKeySet){
Map categoryWordMapDataMap =categoryWordMap.get(categoryWordMapKeySetFile);
Integer value=categoryWordMapDataMap.get(word);if (value!=null && value>0){
sum+=value;
}
}returnsum;
}/*** 获取所有分类类别
*@return
*/
public SetgetAllCategory(){returnallTrainFileSegsMap.keySet();
}