weka的java使用(2)——分类
书接上文,既然写了聚类,再把我用到的分类的相关代码奉上。
1
/** */
/**
2 *
3 */
4 package edu.tju.ikse.mi.util;
5
6 import java.io.File;
7 import java.io.IOException;
8
9 import weka.classifiers.Classifier;
10 import weka.core.Attribute;
11 import weka.core.Instances;
12 import weka.core.converters.ArffLoader;
13
14 /** */ /**
15 * @author Jia Yu
16 * @date 2010-6-9
17 */
18 public class WekaClassifier {
19
20 /** *//**
21 * @param args
22 */
23 private ArffLoader loader;
24 private Instances dataSet;
25 private File arffFile;
26 private int sizeOfDataset;
27 private Classifier classifier;
28 private int sizeOfAttribute;
29 private String resultClass;
30 private double[] distributions;
31
32 public WekaClassifier(File file) throws Exception {
33 this.arffFile = file;
34 loadTrainSet();
35 loadClassifier();
36 buildClassifierModel();
37 }
38
39 private void buildClassifierModel() throws Exception {
40 this.classifier.buildClassifier(dataSet);
41 }
42
43 private void loadClassifier() {
44 this.classifier = new weka.classifiers.meta.RandomSubSpace();
45 }
46
47 private void loadTrainSet() throws IOException {
48 loader = new ArffLoader();
49 loader.setFile(this.arffFile);
50 dataSet = loader.getDataSet();
51 setSizeOfDataset(dataSet.numInstances());
52 setSizeOfAttribute(dataSet.numAttributes());
53 dataSet.setClassIndex(this.sizeOfAttribute - 1);
54 }
55
56 public void classifyInstance(weka.core.Instance instance) throws Exception {
57 double tNum = this.classifier.classifyInstance(instance);
58 setDistributions(this.classifier.distributionForInstance(instance));
59 Attribute attr = dataSet.attribute(dataSet.classIndex());
60 int classIndex = (int) tNum;
61 setResultClass(attr.value(classIndex));
62 }
63
64 public static void main(String[] args) {
65 File file = new File(
66 "iris.arff");
67 try {
68 WekaClassifier wc = new WekaClassifier(file);
69 double[] feature = { 5.1,3.5,1.4,0.2 };
70 weka.core.Instance ins = new weka.core.Instance(wc
71 .getSizeOfAttribute());
72 ins.setDataset(wc.getDataSet());
73 for (int i = 0; i < ins.numAttributes() - 1; i++) {
74 ins.setValue(i, feature[i]);
75 // System.out.println(ins.attribute(i).getLowerNumericBound());
76 }
77 ins.setValue(ins.numAttributes() - 1, "Iris-setosa");
78 System.out.println("original class is "
79 + ins.stringValue(ins.numAttributes() - 1));
80 wc.classifyInstance(ins);
81 System.out.println("classify it to class "
82 + wc.getResultClass());
83 } catch (Exception e) {
84 e.printStackTrace();
85 }
86 }
87
88 public int getSizeOfAttribute() {
89 return sizeOfAttribute;
90 }
91
92 public void setSizeOfAttribute(int sizeOfAttribute) {
93 this.sizeOfAttribute = sizeOfAttribute;
94 }
95
96 public Instances getDataSet() {
97 return dataSet;
98 }
99
100 public void setDataSet(Instances dataSet) {
101 this.dataSet = dataSet;
102 }
103
104 public String getResultClass() {
105 return resultClass;
106 }
107
108 public void setResultClass(String resultClass) {
109 this.resultClass = resultClass;
110 }
111
112 public void setDistributions(double[] distributions) {
113 this.distributions = distributions;
114 }
115
116 public double[] getDistributions() {
117 return distributions;
118 }
119
120 public void setSizeOfDataset(int sizeOfDataset) {
121 this.sizeOfDataset = sizeOfDataset;
122 }
123
124 public int getSizeOfDataset() {
125 return sizeOfDataset;
126 }
127}
128
2 *
3 */
4 package edu.tju.ikse.mi.util;
5
6 import java.io.File;
7 import java.io.IOException;
8
9 import weka.classifiers.Classifier;
10 import weka.core.Attribute;
11 import weka.core.Instances;
12 import weka.core.converters.ArffLoader;
13
14 /** */ /**
15 * @author Jia Yu
16 * @date 2010-6-9
17 */
18 public class WekaClassifier {
19
20 /** *//**
21 * @param args
22 */
23 private ArffLoader loader;
24 private Instances dataSet;
25 private File arffFile;
26 private int sizeOfDataset;
27 private Classifier classifier;
28 private int sizeOfAttribute;
29 private String resultClass;
30 private double[] distributions;
31
32 public WekaClassifier(File file) throws Exception {
33 this.arffFile = file;
34 loadTrainSet();
35 loadClassifier();
36 buildClassifierModel();
37 }
38
39 private void buildClassifierModel() throws Exception {
40 this.classifier.buildClassifier(dataSet);
41 }
42
43 private void loadClassifier() {
44 this.classifier = new weka.classifiers.meta.RandomSubSpace();
45 }
46
47 private void loadTrainSet() throws IOException {
48 loader = new ArffLoader();
49 loader.setFile(this.arffFile);
50 dataSet = loader.getDataSet();
51 setSizeOfDataset(dataSet.numInstances());
52 setSizeOfAttribute(dataSet.numAttributes());
53 dataSet.setClassIndex(this.sizeOfAttribute - 1);
54 }
55
56 public void classifyInstance(weka.core.Instance instance) throws Exception {
57 double tNum = this.classifier.classifyInstance(instance);
58 setDistributions(this.classifier.distributionForInstance(instance));
59 Attribute attr = dataSet.attribute(dataSet.classIndex());
60 int classIndex = (int) tNum;
61 setResultClass(attr.value(classIndex));
62 }
63
64 public static void main(String[] args) {
65 File file = new File(
66 "iris.arff");
67 try {
68 WekaClassifier wc = new WekaClassifier(file);
69 double[] feature = { 5.1,3.5,1.4,0.2 };
70 weka.core.Instance ins = new weka.core.Instance(wc
71 .getSizeOfAttribute());
72 ins.setDataset(wc.getDataSet());
73 for (int i = 0; i < ins.numAttributes() - 1; i++) {
74 ins.setValue(i, feature[i]);
75 // System.out.println(ins.attribute(i).getLowerNumericBound());
76 }
77 ins.setValue(ins.numAttributes() - 1, "Iris-setosa");
78 System.out.println("original class is "
79 + ins.stringValue(ins.numAttributes() - 1));
80 wc.classifyInstance(ins);
81 System.out.println("classify it to class "
82 + wc.getResultClass());
83 } catch (Exception e) {
84 e.printStackTrace();
85 }
86 }
87
88 public int getSizeOfAttribute() {
89 return sizeOfAttribute;
90 }
91
92 public void setSizeOfAttribute(int sizeOfAttribute) {
93 this.sizeOfAttribute = sizeOfAttribute;
94 }
95
96 public Instances getDataSet() {
97 return dataSet;
98 }
99
100 public void setDataSet(Instances dataSet) {
101 this.dataSet = dataSet;
102 }
103
104 public String getResultClass() {
105 return resultClass;
106 }
107
108 public void setResultClass(String resultClass) {
109 this.resultClass = resultClass;
110 }
111
112 public void setDistributions(double[] distributions) {
113 this.distributions = distributions;
114 }
115
116 public double[] getDistributions() {
117 return distributions;
118 }
119
120 public void setSizeOfDataset(int sizeOfDataset) {
121 this.sizeOfDataset = sizeOfDataset;
122 }
123
124 public int getSizeOfDataset() {
125 return sizeOfDataset;
126 }
127}
128
其中的iris数据集也是数据挖掘领域的标准数据集,这个程序的运行结果如下:
original class is Iris-setosa
classify it to class Iris-setosa
感兴趣的朋友可以和我一起研究~~~邮件联系哦。