1. 利用有监督的离散算法对数据集的属性进行离散,并保存离散后的数据集;
import java.io.File;
import weka.filters.SupervisedFilter;}
2 利用weka中的算法对segment-challenge.arff数据集进行标准化处理,并保存标准化后的数据集
import java.io.File;
import weka.filters.SupervisedFilter;
import java.io.IOException;
import weka.core.Instances;
import weka.core.converters.CSVLoader;
import weka.core.converters.ConverterUtils.DataSink;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.*;
import weka.filters.unsupervised.attribute.*;
//unsupervised
import weka.filters.unsupervised.attribute.AddID;
public class preprocess {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Instances instances = DataSource.read("C:/Users/PC/Desktop/segment-challenge.arff");
instances.setClassIndex(instances.numAttributes() - 1);
Normalize normalize = new Normalize ();
System.err.println(instances.toSummaryString());
String[] options = new String[6];
options[0] = "-B";
options[1] = "8";
options[2] = "-M";
options[3] = "-1.0";
options[4] = "-R";
options[5] = "2-last";
normalize.setOptions(options);
normalize.setInputFormat(instances);
Instances newInstances2 = Filter.useFilter(instances, normalize);
System.err.println(newInstances2.toSummaryString());
DataSink.write("C:/Users/PC/Desktop/2.arff", newInstances2);
}
}
http://blog.sina.com.cn/s/blog_6f611c30010185kz.html
http://blog.163.com/shen_960124/blog/static/60730984201502884651349/