首先把 ictclas4j解压缩,然后把 Data文件夹整个拷贝到 Eclipse项目的文件夹下, 而 bin目录下的 org文件夹整个拷贝到你Eclipse项目的 bin目录下,把src目录下的org文件夹整个拷贝到 Eclipse项目 的src目录下。
java weka.filters.supervised.attribute.Discretize -i my_arff_save_path/data_vsm.arff -o my_arff_save_path/data_D_vsm.arff -c first
得到data_D_vsm.arff
测试
import
java.io.File;
import
weka.classifiers.Classifier;
import
weka.classifiers.trees.J48;
import
weka.core.Instances;
import
weka.core.converters.ArffLoader;
import
java.io.BufferedReader;
import
java.io.BufferedWriter;
import
java.io.File;
import
java.io.FileInputStream;
import
java.io.FileNotFoundException;
import
java.io.FileOutputStream;
import
java.io.InputStreamReader;
import
java.io.OutputStreamWriter;
import
java.util.*;
public
class
weka {
public
static
void
main(String[]
args
)
throws
Exception {
BufferedWriter
destFileBw
=
new
BufferedWriter(
new
OutputStreamWriter(
new
FileOutputStream(
new
File(
"E://data.txt"
))));
//
TODO
Auto-generated method stub
Classifier
m_classifier
=
new
J48();
File
inputFile
=
new
File(
"E://data_D_vsm.arff"
);
//训练语料文件
ArffLoader
atf
=
new
ArffLoader();
atf
.setFile(
inputFile
);
Instances
instancesTrain
=
atf
.getDataSet();
// 读入训练文件
inputFile
=
new
File(
"E://data_D_vsm.arff"
);
//测试语料文件
atf
.setFile(
inputFile
);
Instances
instancesTest
=
atf
.getDataSet();
// 读入测试文件
instancesTest
.setClassIndex(0);
//设置分类属性所在行号(第一行为0号),instancesTest.numAttributes()可以取得属性总数
double
sum
=
instancesTest
.numInstances(),
//测试语料实例数
right
= 0.0f;
instancesTrain
.setClassIndex(0);
m_classifier
.buildClassifier(
instancesTrain
);
//训练
for
(
int
i
= 0;
i
<
sum
;
i
++)
//测试分类结果
{
double
predicted
=
m_classifier
.classifyInstance(
instancesTest
.instance(
i
));
System.
out
.println(
"预测某条记录的分类id:"
+
predicted
+
", 分类值:"
+
instancesTest
.classAttribute().value((
int
)
predicted
));
destFileBw
.write(
"预测某条记录的分类id:"
+
predicted
+
", 分类值:"
+
instancesTest
.classAttribute().value((
int
)
predicted
));
destFileBw
.newLine();
System.
out
.println(
"测试文件的分类值: "
+
instancesTest
.instance(
i
).classValue() +
", 记录:"
+
instancesTest
.instance(
i
));
destFileBw
.write(
"测试文件的分类值: "
+
instancesTest
.instance(
i
).classValue() +
", 记录:"
+
instancesTest
.instance(
i
));
destFileBw
.newLine();
System.
out
.println(
"--------------------------------------------------------------"
);
destFileBw
.write(
"--------------------------------------------------------------"
);
destFileBw
.newLine();
if
(
m_classifier
.classifyInstance(
instancesTest
.instance(
i
))==
instancesTest
.instance(
i
).classValue())
//如果预测值和答案值相等(测试语料中的分类列提供的须为正确答案,结果才有意义)
{
right
++;
//正确值加1
}
}
System.
out
.println(
"J48 classification precision:"
+(
right
/
sum
));
destFileBw
.write(
"J48 classification precision:"
+(
right
/
sum
));
destFileBw
.newLine();
destFileBw
.close();
}
}