某公司的试题,答完后保存一下。
import java.net.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.io.*;
/*
* UTF-8
* @author [email protected]
*/
public class Server {
private ServerSocket ss;
private Socket socket;
private BufferedReader in;
private PrintWriter out;
public static ConcurrentMap cMap=new ConcurrentHashMap();//数据可以存到数据库中,这里只是简单写,考虑并发,学用ConcurrentMap,key存ip地址,value存最后登录的时间
public Server(int port) {
try {
ss = new ServerSocket(port);
while (true) {
socket = ss.accept();
String RemoteIP = socket.getInetAddress().getHostAddress();
String RemotePort = ":" + socket.getLocalPort();//System.out.println("从IP:" + RemoteIP+ RemotePort);
long intervalTime;
if(cMap.get(RemoteIP)==null){//判断这个ip最后登录的时间是否存在,不存在则是第一次登录,可以校验
System.out.print("第一次登录,");
System.out.print("服务允许"+RemoteIP+"继续密码校验请求 ,");
in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
String password = in.readLine();
System.out.println("您的密码校验请求为:" + password);
//这里对password进行校验处理,略
out = new PrintWriter(socket.getOutputStream(), true);
out.println("Server收到你的信息:"+password+",并进行密码校验");//返回客户端消息
}else{
intervalTime=System.currentTimeMillis()-Long.valueOf(cMap.get(RemoteIP).toString()).longValue() ;
if(intervalTime<=10000){//如果间隔时间小于10s,则不进行校验
System.out.println("warning:间隔时间小于10s,服务不允许"+RemoteIP+"继续密码校验请求");
//这里不对password进行校验处理,略
out = new PrintWriter(socket.getOutputStream(), true);
out.println("warning:两次登录间隔时间小于10s,服务不允许您的IP:"+RemoteIP+"继续密码校验请求");//返回客户端消息
}else{//如果时间间隔大于10s,则进行校验
System.out.print("间隔时间大于10s,");
System.out.print("服务允许"+RemoteIP+"继续密码校验请求 ,");
in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
String password = in.readLine();
System.out.println("您的密码校验请求为:" + password);
//这里对password进行校验处理,略
out = new PrintWriter(socket.getOutputStream(), true);
out.println("Server收到你的信息:"+password+",并进行密码校验");//返回客户端消息
}
}
cMap.put(RemoteIP, System.currentTimeMillis());//存取这个ip当前访问的时间
out.close();
in.close();
socket.close();
}
} catch (IOException e) {
out.println("wrong");
}
}
private static boolean testPort(String port){//测试端口是否是合法
try {
Integer.valueOf(port).intValue();
return true;
} catch (Exception e) {
return false;
}
}
/* 第一个参数为端口号
* 运行方式:
* javac Server.java
* java Server 端口号
* 例如
* java Server 10000
*/
public static void main(String[] args) {
if(args.length<1){
System.out.println("请输入端口号,比如执行 java Server 10000");
}else if(testPort(args[0])==false){
System.out.println("第一个参数请输入正确的端口号");
}else{
System.out.println("Server starting .. ");
new Server(Integer.valueOf(args[0]).intValue());
}
}
};
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.Socket;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
* UTF-8
* @author [email protected]
*/
public class Client {
Socket socket;
BufferedReader in;
PrintWriter out;
/*
* ipaddress:服务器的地址
* password:您的请求信息
*/
public Client(String ipaddress,int port,String password) {
try {
System.out.println("尝试连接到服务 "+ipaddress+":"+port);
socket = new Socket(ipaddress, port);
//System.out.println("The Server Connected!");
//System.out.println("Please enter some Character:");
//BufferedReader line = new BufferedReader(new InputStreamReader(System.in));//这里可以改成输入的信息;
System.out.println("客户端发送请求信息:"+password);
ByteArrayInputStream stream = new ByteArrayInputStream(password.getBytes());//以字节流传消息
BufferedReader line = new BufferedReader(new InputStreamReader(stream));
out = new PrintWriter(socket.getOutputStream(), true);
out.println(line.readLine());
in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
System.out.println(in.readLine());
out.close();
in.close();
socket.close();
} catch (IOException e) {
//e.printStackTrace();
System.out.println("O,my god,服务端没有连接上,请确认服务已经开启,且您输入的ip和端口号正确");
out.println("O,my god,服务端没有连接上");
}
}
private static boolean testIP(String ip){//测试ip是否合法
Pattern patt = Pattern.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}");
Matcher mat = patt.matcher(ip);
return mat.matches();
}
private static boolean testPort(String port){//测试端口是否是合法
try {
Integer.valueOf(port.toString()).intValue();
return true;
} catch (Exception e) {
return false;
}
}
/*
* 三个参数:1.ipv4地址,2.端口号,3.消息
* 运行方式:
* javac Client.java
* java Client ip 端口号 消息
* 例如
* java Client 192.168.1.116 10000 messages
*/
public static void main(String[] args) {
//new Client("192.168.1.116",10000,"请让郝宁加入搜狐吧!");
if(args.length<3){
System.out.println("请输入参数:服务ip,服务端口,消息,比如执行 java Client 192.168.1.116 10000 messages");
}else if(testIP(args[0])==false){
System.out.println("第一个参数请输入正确的ipv4地址,比如192.168.1.116");
}else if(testPort(args[1])==false){
System.out.println("第二个参数请输入正确的端口号,比如10000");
}else{
new Client(args[0],Integer.valueOf(args[1]).intValue(),args[2]);
}
}
}
-----------------------------------------------------------------
需要commons-logging-1.1.1.jar,httpcore-4.1.jar,httpclient-4.1.jar
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
/*
*GBK
[email protected]
*/
public class ab {
private static final String welcome="This is haoning's ApacheBench, Version 0.1 <$Revision: 000001 $>\n"
+"Copyright 2011 haoning, [email protected] \n"
+"Licensed to haoning, http://www.sohu.com/\n";//欢迎信息
private static final String help=" Usage: java ab [options] [http[s]://]hostname[:port]/path"
+"\n-n requests Number of requests to perform"
+"\n-c concurrency Number of multiple requests to make"
+"\n-v verbosity How much troubleshooting info to print";
private static final String error="ab: wrong number of arguments \n";
public static volatile int count=1;//5;//线程完成的数量,成功数 ,-n参数为初始值,一个线程报错就减1
public static volatile int errorcount=0;//线程完成的数量
public static volatile int concurrency=1;//2;//并发线程个数, -c参数
public static volatile int verbosity=1;//并发线程个数, -v参数
public static volatile String requestHeaderString=null;//请求的头信息 -v参数用
public static volatile String responseHeaderString=null;//相应的头信息 -v参数用
public static volatile HttpGet publichttpget=null;//用来获取头信息的getRequest;
public static volatile HttpResponse publicresponse =null;//用来获取头信息的response;
public static volatile int documentLength=0;
public static volatile int responseheaderslength=0;//所有相应的头的长度
public static volatile int requestheaderslength=0;//所有请求的头的长度
public static volatile int responsecontentlength=0;//所有请求内容的长度
/*
* 判断字符串是否为数字
*/
public static boolean isNumeric(String str){
Pattern pattern = Pattern.compile("[0-9]*");
return pattern.matcher(str).matches();
}
public static String getValue(String[] args,String key){//在参数中,根据参数类型,取到参数值
String value="1";
for(int i=0;i<args.length;i++){
if(args[i].equals(key)){
value= args[i+1];
}
}
if(isNumeric(value)!=true){
value="1";
}
return value;
}
/*
* 验证参数
*/
public static int validateArgs(String[] args){
List keys=new ArrayList();
keys.add("-v");//详细信息
keys.add("-c");//并发数
keys.add("-n");//request个数,
//keys.add("-A");//用户名密码认证
if(args.length<1){//没有参数报错,至少有一个url
System.out.println(error+help);
return -1;
}
if(args.length==1){//没有参数报错,至少有一个url
//System.out.println(".done");
return 1;
}
if(args.length/2==0){//参数是偶数报错, <-参数名> <参数值> url
System.out.println(error+help);
return -2;
}
for(int i=0;i<args.length-1;i=i+2){//除了最后一个参数,奇数如果参数不是关键字,报错
if(!keys.contains(args[i])){
System.out.println(error+help);
return -3;
}
}
return 0;
}
/*
* 获取Request的头信息的字符串
*/
public static void getRequestHeaderString(HttpGet httpget){
StringBuffer sb=new StringBuffer();
sb.append("INFO: POST header ==\n");
sb.append("---\n");//request的头信息
StringBuffer sb1=new StringBuffer();
sb1.append(httpget.METHOD_NAME+" "+httpget.getURI().getRawPath()+" "+httpget.getProtocolVersion()+"\n");
sb1.append("Host: " + httpget.getURI().getAuthority());
requestheaderslength=sb1.toString().length();//统计request的header的长度
sb.append(sb1.toString()+"\n");
sb.append("User-Agent: haoning's ApacheBench/0.1\n");
sb.append("Accept: */* \n\n");
if(verbosity>2){//如果参数-v 大于2则显示详细内容
System.out.println(sb.toString());
}
//return sb.toString();
}
/*
* 获取response的头信息的字符串
*/
public static void getResponseHeaderString(HttpResponse response,String responseContent) throws IllegalStateException, IOException{
StringBuffer sb=new StringBuffer();
sb.append("---\n");
sb.append("LOG: header received:\n");//response的头信息
sb.append(response.getStatusLine()+"\n");
Header[] headers=response.getAllHeaders();
int headerlength=0;
for(int i=0;i<headers.length;i++){
sb.append(headers[i].toString()+"\n");
headerlength+=headers[i].toString().length();//统计responseHeader的长度
}
responseheaderslength=headerlength;
sb.append("Connection: close\n\n");
sb.append(responseContent+"\n");//response获得的内容
responsecontentlength=responseContent.length();
sb.append("LOG: Response Code="+response.getStatusLine().getStatusCode()+"\n");//LOG: Response Code=200
sb.append("..done \n\n");
if(verbosity>2){//如果参数-v 大于2则显示详细内容
System.out.println(sb.toString());
}
}
/*
* 并发的执行一些request请求;
*/
public static void MultiThreadExecution(int thisconcurrency,String url) {//参数1:并发线程个数;参数2:请求地址
if(thisconcurrency==0){
return;//没有并发数,返回;
}
ThreadSafeClientConnManager cm = new ThreadSafeClientConnManager();
cm.setMaxTotal(100);
HttpClient httpclient = new DefaultHttpClient(cm);
try {
String[] urisToGet = new String[thisconcurrency];
for(int i=0;i<thisconcurrency;i++){
urisToGet[i]=url;//
}
AbThread[] threads = new AbThread[thisconcurrency];
for (int i = 0; i < threads.length; i++) {
HttpGet httpget = new HttpGet(urisToGet[i]);
if(publichttpget==null){
publichttpget=httpget;//取header信息用
}
threads[i] = new AbThread(httpclient, httpget, i + 1);
}
for (int j = 0; j < threads.length; j++) {
threads[j].start();
}
for (int j = 0; j < threads.length; j++) {
threads[j].join();
}
} catch(Exception ex){
System.out.println("MultiThreadExecution---------------exception");
ex.printStackTrace();
}finally {
httpclient.getConnectionManager().shutdown();
}
}
public static void main(String[] args) {
System.out.println(welcome);
//如果没有参数-n,则线程总数count=1;
//如果没有参数-c,则线程并发数为concurrency=1
//如果没有参数-v,则详细信息参数为verbosity为1
if(validateArgs(args)<0){
return;
}
verbosity=Integer.valueOf(getValue(args,"-v")).intValue();
count=Integer.valueOf(getValue(args,"-n")).intValue();
concurrency=Integer.valueOf(getValue(args,"-c")).intValue();
if(count<concurrency){
System.out.println("ab: Cannot use concurrency level greater than total number of requests");
return;
}
//String url="http://www.baidu.com/";
String url=args[args.length-1];
System.out.println("Benchmarking "+url+" (be patient)....done\n\n");
int loopcount=count/concurrency;//完整并发的循环次数,
int lastconcurrency=count%concurrency;//最后一次并发的个数;
long t1=System.currentTimeMillis();//开始时间
for(int i=0;i<loopcount;i++){
MultiThreadExecution(concurrency,url);//执行并发
}
MultiThreadExecution(lastconcurrency,url);//执行不满足并发个数的并发
long t2=System.currentTimeMillis();//结束时间
long longtime=t2-t1;//执行时间
String serverSoftware="unknow server";//获取服务器类型
if(publicresponse==null){
//System.out.println("response-----null");
return;
}
Header[] headers=publicresponse.getAllHeaders();
for(int i=0;i<headers.length;i++){
if(headers[i].getName().trim().equals("Server")){
serverSoftware=headers[i].getValue();
}
}
System.out.println("Server Software: " + serverSoftware);//服务器地址
System.out.println("Server Hostname: " + publichttpget.getURI().getHost());
System.out.println("Server Port: " + (publichttpget.getURI().getPort()==-1?"80":publichttpget.getURI().getPort())); //默认-1则为80 端口
System.out.println();
System.out.println("Document Path: " + publichttpget.getURI().getPath());
System.out.println("Document Length: " + publicresponse.getEntity().getContentLength()+" bytes");
System.out.println();
System.out.println();
System.out.println("Concurrency Level: "+concurrency);//并发数
Object longtimebys= longtime/1000.00;
System.out.println("Time taken for tests: "+longtimebys+" seconds");//所用的时间ms转成秒
System.out.println("Complete requests: "+count);//线程完成数
System.out.println("Failed requests: "+errorcount);//出现错误的request个数
System.out.println("Write errors: "+errorcount);
System.out.println("Total transferred: "+(responsecontentlength+responseheaderslength+requestheaderslength)*count+" bytes");//(内容长度+response的头长度+request的头长度)*成功数
System.out.println("HTML transferred: "+publicresponse.getEntity().getContentLength()*count+" bytes");//内容长度*成功的请求总数
System.out.println("Requests per second: "+(long)(count+errorcount)/(longtime/1000.00)+" [#/sec] (mean)");//request总数/时间
System.out.println("Time per request: "+(longtime/1000.00)/(long)(count+errorcount)+" [ms] (mean)");//所用时间/request总数
System.out.println("Time per request: "+(longtime/1000.00)/(long)(count+errorcount)/concurrency+" [ms] (mean, across all concurrent requests)");//所用时间/request总数/并发数
//System.out.println("Transfer rate: 36.61 [Kbytes/sec] received");//这个没有计算
System.out.println();
}
/*
* Ab线程
*/
static class AbThread extends Thread {
private final HttpClient httpClient;
private final HttpContext context;
private final HttpGet httpget;
private final int id;
public AbThread(HttpClient httpClient, HttpGet httpget, int id) {
this.httpClient = httpClient;
this.context = new BasicHttpContext();
this.httpget = httpget;
this.id = id;
}
@Override
public void run() {
try {
HttpResponse response = httpClient.execute(httpget, context);
if(publicresponse==null){
publicresponse=response;//取head信息用;
}
HttpEntity entity = response.getEntity();
if (entity != null) {
byte[] bytes = EntityUtils.toByteArray(entity);//消耗掉,必须有
getRequestHeaderString(httpget);//-v输出,并统计request head长度
getResponseHeaderString(response,new String(bytes));//-v输出,并统计 response head长度
}
} catch (Exception e) {
count--;//出错则成功数减1
errorcount++;//错误数加1
httpget.abort();
System.out.println("ab: invalid URL");
System.out.println(help);
// System.out.println(id + " - error: " + e);
// e.printStackTrace();
}
}
}
}
----------------------------------------------------------
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
* GBK
* [email protected]
*/
public class SortTxt {
public static final int sampleCount=1000;//抽样选取个数,可自定义,越大越准;
public static final String trainTempFile="c:\\trainSet.txt";//训练集合存放的位置,可自定义
//public static final String articlesFolder="D:\\sohu3\\articles";//文本分类的文件夹的目录
/**
* @param args
* 比如articles.tar.bz2加压到D:\sohu3\articles
* 用java SortTxt traintxt D:\sohu3\articles 得到训练集合
* 用java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript 对这个文件夹下的文件进行分类,打印输出类别和正确率
*/
public static void main(String[] args) {
if(args.length<2){
System.out.println("please use 'java SortTxt traintxt <articlesFolder>' for example :'java SortTxt traintxt D:\\sohu3\\articles'");
System.out.println("or 'java SortTxt sorttxt <sortFolder>' for example:'java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript'");
}else if(args[0].equals("traintxt")){
saveTrainSet(args[1]);
}else if(args[0].equals("sorttxt")){
sortFilesByTrainSet(args[1]);
}else{
System.out.println("please use 'java SortTxt traintxt <articlesFolder>' for example :'java SortTxt traintxt D:\\sohu3\\articles'");
System.out.println("or 'java SortTxt sorttxt <sortFolder>' for example:'java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript'");
}
//saveTrainSet(articlesFolder);//生成训练集合,保存到c:\trainSet.txt
//trainTest();
//String sortFolder="D:\\sohu3\\articles\\sci.physics.relativity";
//sortFilesByTrainSet(sortFolder);
}
/*
* map按照value排序,从大到小,存到一个list里面;
*/
public static List sortMap(Map inMap) {
Map outMap = new HashMap();
List list = new ArrayList(inMap.entrySet());
Collections.sort(list, new Comparator() {
public int compare(Object o1, Object o2) {
Map.Entry obj1 = (Map.Entry) o1;
Map.Entry obj2 = (Map.Entry) o2;
return Integer.valueOf(obj2.getValue().toString()).intValue()
- Integer.valueOf(obj1.getValue().toString())
.intValue();
}
});
return list;
}
/*
* 把List<Map>转成List<String> ,String为Map的Key
*/
public static List convertList(List list) {
Map outMap = new HashMap();
List newlist = new ArrayList();
for(int i=0;i<list.size();i++){
newlist.add(((Map.Entry) list.get(i)).getKey());
}
return newlist;
}
/*
* 一个文件转换成map,key为单词,value为出现次数
*/
public static Map getMapFromFile(String filepath) {
TreeMap myTreeMap = new TreeMap();// 创建树映射 存放键/值对
try {
BufferedReader buf = new BufferedReader(new FileReader(filepath));
StringBuffer sbuf = new StringBuffer();// 缓冲字符串
String line = null;
while ((line = buf.readLine()) != null) {
sbuf.append(line);// 追加到缓冲字符串中
}
buf.close();// 读取结束
Pattern expression = Pattern.compile("[1-9a-zA-Z]+");// 定义正则表达式匹配单词
String string1 = sbuf.toString();// .toLowerCase();// 转换成小写
Matcher matcher = expression.matcher(string1);// 定义string1的匹配器
int n = 0;// 文章中单词总数
Object word = null;// 文章中的单词
Object num = null;// 出现的次数
while (matcher.find()) {// 是否匹配单词
word = matcher.group();// 得到一个单词-树映射的键
n++;// 单词数加1
if (myTreeMap.containsKey(word)) {// 如果包含该键,单词出现过
num = myTreeMap.get(word);// 得到单词出现的次数
Integer count = (Integer) num;// 强制转化
myTreeMap.put(word, new Integer(count.intValue() + 1));
} else {
myTreeMap.put(word, new Integer(1));// 否则单词第一次出现,添加到映射中
}
}
} catch (Exception e) {
e.printStackTrace();
}
return myTreeMap;
}
/*
* 合并两个map,相同的key值相加
*/
public static Map combineMap(Map map1,Map map2){
Iterator ii=map1.keySet().iterator();
String temp;
while(ii.hasNext()){
temp=(String) ii.next();
if(map2.containsKey(temp)){
map1.put(temp, Integer.valueOf(map1.get(temp).toString()).intValue()+Integer.valueOf(map2.get(temp).toString()).intValue());
map2.remove(temp);
}
}
map1.putAll(map2);
return map1;
}
/*
* 一个目录下的所有文件,统计成一个map,key为单词,value为单词数
*/
public static Map getMapFromFolder(String filepath){
File filesDir = new File(filepath);
File[] files = filesDir.listFiles();
Map map=new HashMap();
for (int i = 0; i < files.length; i++) {
String fileName = files[i].getName();
String path = files[i].getAbsolutePath();
map=combineMap(map,getMapFromFile(path));
//System.out.println(fileName + "--" + path+"---"+map.size()+"-"+getMapFromFile(path).size());
}
return map;
}
/*
* 把所有文件夹下的所有文件转成一个map,key为单词,value为单词数
*/
public static Map getTotalMapFromFolder(String filepath) {
File filesDir = new File(filepath);
File[] files = filesDir.listFiles();
Map map=new HashMap();
for (int i = 0; i < files.length; i++) {
String fileName = files[i].getName();
String path = files[i].getAbsolutePath();
map=combineMap(map,getMapFromFolder(path));
}
return map;
}
/*
* map转成文件
*/
public static void convertMapToFile(Map map){
try {
BufferedWriter bufw = new BufferedWriter(new FileWriter(trainTempFile));
Iterator iter = map.keySet().iterator();// 得到树映射键集合的迭代器
Object key = null;
while (iter.hasNext()) {// 使用迭代器遍历树映射的键
key = iter.next();
bufw.write((String) key + "::" + map.get(key));// 键/值写到文件中
bufw.newLine();
}
//bufw.write("diffrent words:" + map.size() );
bufw.close();
System.out.println("the trainset at c:\\trainSet.txt");
} catch (Exception e) {
e.printStackTrace();
}
}
/*
* 文件转成map
*/
public static Map convertFileToMap(String filepath){
Map fileMap=new HashMap();
try {
BufferedReader buf = new BufferedReader(new FileReader(filepath));//c:\\trainSet.txt
StringBuffer sbuf = new StringBuffer();// 缓冲字符串
String line = null;
while ((line = buf.readLine()) != null) {
fileMap.put(line.split("::")[0], convertStringToList(line.split("::")[1]));
}
} catch (Exception e) {
e.printStackTrace();
}
return fileMap;
}
/*
* 字符串转List
*/
public static List convertStringToList(String arrayString){
List value=new ArrayList();
arrayString=arrayString.substring(1, arrayString.length()-1);//去掉两边的[]
String[] array=arrayString.split(",");
for(int i=0;i<array.length;i++){
value.add(array[i].trim());
}
return value;
}
/*
* Map转成排好顺序的list,取前sampleCount个输出文件
*/
public static void convertSortedMapToFile(Map map){
try {
BufferedWriter bufw = new BufferedWriter(new FileWriter("trainSet.txt"));
List theList=sortMap(map);
theList=theList.subList(0, sampleCount);
System.out.println(theList.size());
for (int i = 0; i < theList.size(); i++) {
//System.out.println((((Map.Entry) theList.get(i)).getKey() + "--" + ((Map.Entry) theList.get(i)).getValue()));
bufw.write((((Map.Entry) theList.get(i)).getKey() + "--" + ((Map.Entry) theList.get(i)).getValue()));
bufw.newLine();
}
//bufw.write("diffrent words:" + theList.size());
bufw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/*
* 获取训练得出的map,key为文件夹名,value为一个List,List里面存的是符合这个文件夹特征的单词
*/
public static Map getTrainMap(String folderpath){
File filesDir = new File(folderpath);
File[] files = filesDir.listFiles();
Map maps=new HashMap();//单个文件夹,key为文件夹名称,value为List,List里为出现次数最多的sampleCount个字符
Map total=new HashMap();
Map temp=new HashMap();
for (int i = 0; i < files.length; i++) {
String fileName = files[i].getName();
String path = files[i].getAbsolutePath();
temp=getMapFromFolder(path);
maps.put(path, convertList(sortMap(temp)).subList(0, sampleCount));
//maps.put(fileName, sortMap(temp).subList(0, sampleCount));
total=combineMap(total,temp);
}
List totalList=convertList(sortMap(total)).subList(0, sampleCount);//所有文件夹的总keyword生成的List
//List totalList=sortMap(total).subList(0, sampleCount);
//System.out.println("total:"+totalList);
for (int i = 0; i < files.length; i++) {
String fileName = files[i].getName();
String path = files[i].getAbsolutePath();
maps.put(path, restList((List)maps.get(path),totalList));
}
return maps;
}
/*
* 小集合减掉交集的值,这个是训练后的集合
*/
public static List restList(List small,List large){
List rest=new ArrayList();
for (Object s1 : small) {
if (!large.contains(s1)) {
rest.add(s1);
}
}
return rest;
}
/*
* 根据目录生成训练集,保存到c:\\trainSet.txt
*/
public static void saveTrainSet(String folderPath){
System.out.println("Please wait for 1 minute ........");
Map map=getTrainMap(folderPath);
convertMapToFile(map);
}
/*
* 打印出分类结果,并统计分类正确率
*/
public static void sortFilesByTrainSet(String sortFolder){
Map map=convertFileToMap(trainTempFile);
System.out.println("sort-------"+sortFolder+"---------");
File filesDir = new File(sortFolder);
File[] files = filesDir.listFiles();
int rightcount=0;//分类正确的个数
for (int i = 0; i < files.length; i++) {
String aa=sortFile(files[i].getAbsolutePath(),map).trim();
System.out.println("file "+files[i].getAbsolutePath()+" belong to " +aa);
if(aa.equals(sortFolder.trim())){
rightcount++;
}
}
int i = rightcount; //分类正确的个数
//System.out.println(i);
int j = files.length ;
DecimalFormat df = new DecimalFormat();
df.setMaximumFractionDigits(2);
df.setMinimumFractionDigits(2);
String accuracy= df.format(i * 100.00 / j) + "%";
System.out.println("the accuracy is "+accuracy);
}
public static void trainTest() {
long l1=System.currentTimeMillis();
//Map map=getTrainMap("D:\\sohu3\\test\\test");
//Map map=getMapFromFolder("D:\\sohu3\\test\\comp.lang.javascript");
//Map map=getTotalMapFromFolder("D:\\sohu3\\articles");//"D:\\sohu3\\test\\"
//Map map=getTrainMap("D:\\sohu3\\articles");
Map map=convertFileToMap(trainTempFile);
Iterator it = map.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = (Map.Entry) it.next();
Object key = entry.getKey();
Object value = entry.getValue();
System.out.println(key+"--"+value);
System.out.println(key+"--"+((List) value).size());
}
//System.out.println(map.size());
// List theList=sortMap(map);
// for (int i = 0; i < theList.size(); i++) {
// System.out.println((((Map.Entry) theList.get(i)).getKey() + ":" + ((Map.Entry) theList.get(i)).getValue()));
// }
// System.out.println(theList.size());
//convertMapToFile(map);
//convertSortedMapToFile(map);
System.out.println(sortFile("D:\\sohu3\\articles\\comp.lang.javascript\\555836.txt",map));
long l2=System.currentTimeMillis();
System.out.println(l2-l1+"ms");
}
/*
* 把一个文件进行分类,filePath是文件路径,trainMap是训练得到的Map,这个map里,key是文件夹名,value是这个文件夹的培训得到的List,List里是训练得到的单词
* 返回值为这个文件应在的类别中
* 作用:根据训练集合把文件分类,返回文件的类别目录;
*/
public static String sortFile(String filePath,Map trainMap){
Iterator it = trainMap.entrySet().iterator();
//Map resultMap=new HashMap();
int top=0;//最大值
int tmp=0;
String returnValue="";
while (it.hasNext()) {
Map.Entry entry = (Map.Entry) it.next();
String key = (String) entry.getKey();
List value = (List) entry.getValue();
tmp=getCountFromMap(value,getMapFromFile(filePath));
// resultMap.put(key, tmp);
if(top<tmp){//从map中取得value最大的,并返回这个key值
top=tmp;
//System.out.println(key);
returnValue=key;
}
}
return returnValue;
}
/*
* 根据,从文件生成的map,训练集得到了List,获得,训练集中的keyword在文件中出现的次数总数
*/
public static int getCountFromMap(List train,Map mapFromFile){
int count=0;
String tempkey="";
for(int i=0;i<train.size();i++){
tempkey=train.get(i).toString();
if(mapFromFile.get(tempkey)!=null){
count+=Integer.valueOf(mapFromFile.get(tempkey).toString()).intValue();
}
}
return count;
}
}