备份试题

某公司的试题,答完后保存一下。
import java.net.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.io.*;
/*
 * UTF-8
 * @author [email protected]
 */
public class Server {
	private ServerSocket ss;
	private Socket socket;
	private BufferedReader in;
	private PrintWriter out;
	public static ConcurrentMap cMap=new ConcurrentHashMap();//数据可以存到数据库中,这里只是简单写,考虑并发,学用ConcurrentMap,key存ip地址,value存最后登录的时间
	public Server(int port) {
		try {
			ss = new ServerSocket(port);
			while (true) {
				socket = ss.accept();
				String RemoteIP = socket.getInetAddress().getHostAddress();
				String RemotePort = ":" + socket.getLocalPort();//System.out.println("从IP:" + RemoteIP+ RemotePort);
				long intervalTime;
				if(cMap.get(RemoteIP)==null){//判断这个ip最后登录的时间是否存在,不存在则是第一次登录,可以校验
					System.out.print("第一次登录,");
					System.out.print("服务允许"+RemoteIP+"继续密码校验请求 ,");
					in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
					String password = in.readLine();
					System.out.println("您的密码校验请求为:" + password);
					//这里对password进行校验处理,略
					out = new PrintWriter(socket.getOutputStream(), true);
					out.println("Server收到你的信息:"+password+",并进行密码校验");//返回客户端消息
				}else{
					intervalTime=System.currentTimeMillis()-Long.valueOf(cMap.get(RemoteIP).toString()).longValue() ;
					if(intervalTime<=10000){//如果间隔时间小于10s,则不进行校验
						System.out.println("warning:间隔时间小于10s,服务不允许"+RemoteIP+"继续密码校验请求");
						//这里不对password进行校验处理,略
						out = new PrintWriter(socket.getOutputStream(), true);
						out.println("warning:两次登录间隔时间小于10s,服务不允许您的IP:"+RemoteIP+"继续密码校验请求");//返回客户端消息
					}else{//如果时间间隔大于10s,则进行校验
						System.out.print("间隔时间大于10s,");
						System.out.print("服务允许"+RemoteIP+"继续密码校验请求 ,");
						in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
						String password = in.readLine();
						System.out.println("您的密码校验请求为:" + password);
						//这里对password进行校验处理,略
						out = new PrintWriter(socket.getOutputStream(), true);
						out.println("Server收到你的信息:"+password+",并进行密码校验");//返回客户端消息
					}
				}
				cMap.put(RemoteIP, System.currentTimeMillis());//存取这个ip当前访问的时间
				out.close();
				in.close();
				socket.close();
			}
		} catch (IOException e) {
			out.println("wrong");
		}
	}
	private static boolean testPort(String port){//测试端口是否是合法
		try {
			Integer.valueOf(port).intValue();
			return true;
		} catch (Exception e) {
			return false;
		} 
	}
	/* 第一个参数为端口号
	 * 运行方式: 
	 * javac Server.java
	 * java Server 端口号
	 * 例如
	 * java Server 10000
	 */
	public static void main(String[] args) {
		if(args.length<1){
			System.out.println("请输入端口号,比如执行 java Server 10000");
		}else if(testPort(args[0])==false){
			System.out.println("第一个参数请输入正确的端口号");
		}else{
			System.out.println("Server starting .. ");
			new Server(Integer.valueOf(args[0]).intValue());
		}
	}
};


import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.Socket;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
 * UTF-8
 * @author [email protected]
 */
public class Client {
	Socket socket;
	BufferedReader in;
	PrintWriter out;
	/*
	 * ipaddress:服务器的地址
	 * password:您的请求信息
	 */
	public Client(String ipaddress,int port,String password) {
		try {
			System.out.println("尝试连接到服务 "+ipaddress+":"+port);
			socket = new Socket(ipaddress, port);
			//System.out.println("The Server Connected!");
			//System.out.println("Please enter some Character:"); 
			//BufferedReader line = new BufferedReader(new InputStreamReader(System.in));//这里可以改成输入的信息;
			System.out.println("客户端发送请求信息:"+password);
			ByteArrayInputStream stream = new ByteArrayInputStream(password.getBytes());//以字节流传消息
			BufferedReader line = new BufferedReader(new InputStreamReader(stream));
			out = new PrintWriter(socket.getOutputStream(), true);
			out.println(line.readLine());
			in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
			System.out.println(in.readLine());
			out.close();
			in.close();
			socket.close();
		} catch (IOException e) {
			//e.printStackTrace();
			System.out.println("O,my god,服务端没有连接上,请确认服务已经开启,且您输入的ip和端口号正确");
			out.println("O,my god,服务端没有连接上");
		}
	}
	private static boolean testIP(String ip){//测试ip是否合法
	    Pattern patt = Pattern.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}");
	    Matcher mat = patt.matcher(ip);
	    return mat.matches();
	}
	private static boolean testPort(String port){//测试端口是否是合法
		try {
			Integer.valueOf(port.toString()).intValue();
			return true;
		} catch (Exception e) {
			return false;
		} 
	}
	/*
	 * 三个参数:1.ipv4地址,2.端口号,3.消息
	 * 运行方式: 
	 * javac Client.java
	 * java Client ip 端口号  消息
	 * 例如
	 * java Client 192.168.1.116 10000 messages
	 */
	public static void main(String[] args) {
		//new Client("192.168.1.116",10000,"请让郝宁加入搜狐吧!");
		if(args.length<3){
			System.out.println("请输入参数:服务ip,服务端口,消息,比如执行 java Client 192.168.1.116 10000 messages");
		}else if(testIP(args[0])==false){
			System.out.println("第一个参数请输入正确的ipv4地址,比如192.168.1.116");
		}else if(testPort(args[1])==false){
			System.out.println("第二个参数请输入正确的端口号,比如10000");
		}else{
			new Client(args[0],Integer.valueOf(args[1]).intValue(),args[2]);
		}
	}
}

-----------------------------------------------------------------
需要commons-logging-1.1.1.jar,httpcore-4.1.jar,httpclient-4.1.jar
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
/*
*GBK
[email protected]
*/
public class ab {
	private static final String welcome="This is haoning's ApacheBench, Version 0.1 <$Revision: 000001 $>\n"
		+"Copyright 2011 haoning, [email protected] \n"
		+"Licensed to haoning, http://www.sohu.com/\n";//欢迎信息
	private static final String help=" Usage: java ab [options] [http[s]://]hostname[:port]/path"
									+"\n-n requests     Number of requests to perform"
									+"\n-c concurrency  Number of multiple requests to make"
									+"\n-v verbosity    How much troubleshooting info to print";
	private static final String error="ab: wrong number of arguments \n";
	public static volatile  int count=1;//5;//线程完成的数量,成功数 ,-n参数为初始值,一个线程报错就减1
	public static volatile  int errorcount=0;//线程完成的数量
	public static volatile  int concurrency=1;//2;//并发线程个数, -c参数
	public static volatile  int verbosity=1;//并发线程个数, -v参数
	public static volatile  String requestHeaderString=null;//请求的头信息 -v参数用
	public static volatile  String responseHeaderString=null;//相应的头信息 -v参数用
	public static volatile  HttpGet publichttpget=null;//用来获取头信息的getRequest;
	public static volatile  HttpResponse publicresponse =null;//用来获取头信息的response;
	public static volatile  int documentLength=0;
	public static volatile  int responseheaderslength=0;//所有相应的头的长度
	public static volatile  int requestheaderslength=0;//所有请求的头的长度
	public static volatile  int responsecontentlength=0;//所有请求内容的长度
	/*
	 * 判断字符串是否为数字
	 */
	public static boolean isNumeric(String str){ 
	    Pattern pattern = Pattern.compile("[0-9]*"); 
	    return pattern.matcher(str).matches();    
	} 
	public static String getValue(String[] args,String key){//在参数中,根据参数类型,取到参数值
		String value="1";
		for(int i=0;i<args.length;i++){
			if(args[i].equals(key)){
				value= args[i+1];
			}
		}
		if(isNumeric(value)!=true){
			value="1";
		}
		return value;
	}
	/*
	 * 验证参数
	 */
	public static int validateArgs(String[] args){
		List keys=new ArrayList();
		keys.add("-v");//详细信息
		keys.add("-c");//并发数
		keys.add("-n");//request个数,
		//keys.add("-A");//用户名密码认证
		if(args.length<1){//没有参数报错,至少有一个url
			System.out.println(error+help);
			return -1;
		}	
		if(args.length==1){//没有参数报错,至少有一个url
			//System.out.println(".done");
			return 1;
		}
		if(args.length/2==0){//参数是偶数报错, <-参数名> <参数值>  url
			System.out.println(error+help);
			return -2;
		}
		for(int i=0;i<args.length-1;i=i+2){//除了最后一个参数,奇数如果参数不是关键字,报错
			if(!keys.contains(args[i])){
				System.out.println(error+help);
				return -3;
			}
		}
		return 0;
	}
	/*
	 * 获取Request的头信息的字符串
	 */
	public static void getRequestHeaderString(HttpGet httpget){
		StringBuffer sb=new StringBuffer();
		sb.append("INFO: POST header ==\n");
		sb.append("---\n");//request的头信息
	        StringBuffer sb1=new StringBuffer();
	        sb1.append(httpget.METHOD_NAME+" "+httpget.getURI().getRawPath()+" "+httpget.getProtocolVersion()+"\n");
	        sb1.append("Host: " + httpget.getURI().getAuthority());
	        requestheaderslength=sb1.toString().length();//统计request的header的长度
        sb.append(sb1.toString()+"\n"); 
        sb.append("User-Agent: haoning's ApacheBench/0.1\n");
        sb.append("Accept: */* \n\n");
        if(verbosity>2){//如果参数-v 大于2则显示详细内容
            System.out.println(sb.toString());
        }
        	//return sb.toString();
	}
	/*
	 * 获取response的头信息的字符串
	 */
	public static void getResponseHeaderString(HttpResponse response,String responseContent) throws IllegalStateException, IOException{
		StringBuffer sb=new StringBuffer();
		sb.append("---\n");
		sb.append("LOG: header received:\n");//response的头信息
		sb.append(response.getStatusLine()+"\n");
        Header[] headers=response.getAllHeaders();
        int headerlength=0;
        for(int i=0;i<headers.length;i++){
        	sb.append(headers[i].toString()+"\n");
        	headerlength+=headers[i].toString().length();//统计responseHeader的长度
        }
        responseheaderslength=headerlength;
        sb.append("Connection: close\n\n");
        sb.append(responseContent+"\n");//response获得的内容
        responsecontentlength=responseContent.length();
        sb.append("LOG: Response Code="+response.getStatusLine().getStatusCode()+"\n");//LOG: Response Code=200
        sb.append("..done \n\n");
        if(verbosity>2){//如果参数-v 大于2则显示详细内容
        	System.out.println(sb.toString());
        }
	}
	/*
	 * 并发的执行一些request请求;
	 */
	public static void MultiThreadExecution(int thisconcurrency,String url) {//参数1:并发线程个数;参数2:请求地址
		if(thisconcurrency==0){
			return;//没有并发数,返回;
		}
		ThreadSafeClientConnManager cm = new ThreadSafeClientConnManager();
        cm.setMaxTotal(100);
        HttpClient httpclient = new DefaultHttpClient(cm);     
        try {
            String[] urisToGet = new String[thisconcurrency];
            for(int i=0;i<thisconcurrency;i++){
            	urisToGet[i]=url;//
            }
            AbThread[] threads = new AbThread[thisconcurrency];
            for (int i = 0; i < threads.length; i++) {
                HttpGet httpget = new HttpGet(urisToGet[i]);
                if(publichttpget==null){
                	publichttpget=httpget;//取header信息用
                }
                threads[i] = new AbThread(httpclient, httpget, i + 1);
            }
            for (int j = 0; j < threads.length; j++) {
                threads[j].start();
            }
            for (int j = 0; j < threads.length; j++) {
                threads[j].join();
            }
        } catch(Exception ex){
        	System.out.println("MultiThreadExecution---------------exception");
        	ex.printStackTrace();
        }finally {
            httpclient.getConnectionManager().shutdown();
        }
	}
    public static void main(String[] args)  {
    	System.out.println(welcome);
    	//如果没有参数-n,则线程总数count=1;
    	//如果没有参数-c,则线程并发数为concurrency=1
    	//如果没有参数-v,则详细信息参数为verbosity为1
    	if(validateArgs(args)<0){
			return;
		}
		verbosity=Integer.valueOf(getValue(args,"-v")).intValue();
    	count=Integer.valueOf(getValue(args,"-n")).intValue();
    	concurrency=Integer.valueOf(getValue(args,"-c")).intValue();
    	if(count<concurrency){
    		System.out.println("ab: Cannot use concurrency level greater than total number of requests");
    		return;
    	}
    	//String url="http://www.baidu.com/";
    	String url=args[args.length-1];
    	System.out.println("Benchmarking "+url+" (be patient)....done\n\n");
    	int loopcount=count/concurrency;//完整并发的循环次数,
    	int lastconcurrency=count%concurrency;//最后一次并发的个数;
    	long t1=System.currentTimeMillis();//开始时间
    	for(int i=0;i<loopcount;i++){
    		MultiThreadExecution(concurrency,url);//执行并发
    	}
    	MultiThreadExecution(lastconcurrency,url);//执行不满足并发个数的并发
    	long t2=System.currentTimeMillis();//结束时间
        long longtime=t2-t1;//执行时间
        String serverSoftware="unknow server";//获取服务器类型
        if(publicresponse==null){
        	//System.out.println("response-----null");
        	return;
        }
    	Header[] headers=publicresponse.getAllHeaders();
    	for(int i=0;i<headers.length;i++){
    		if(headers[i].getName().trim().equals("Server")){
    			serverSoftware=headers[i].getValue();
    		}
    	}
        System.out.println("Server Software:        " + serverSoftware);//服务器地址
        System.out.println("Server Hostname:        " + publichttpget.getURI().getHost());
        System.out.println("Server Port:            " + (publichttpget.getURI().getPort()==-1?"80":publichttpget.getURI().getPort())); //默认-1则为80 端口
        System.out.println();
        System.out.println("Document Path:          " + publichttpget.getURI().getPath()); 
        System.out.println("Document Length:        " + publicresponse.getEntity().getContentLength()+" bytes"); 
        System.out.println();            
        System.out.println();   
        System.out.println("Concurrency Level:      "+concurrency);//并发数
        Object longtimebys= longtime/1000.00;
        System.out.println("Time taken for tests:   "+longtimebys+" seconds");//所用的时间ms转成秒
        System.out.println("Complete requests:      "+count);//线程完成数
        System.out.println("Failed requests:        "+errorcount);//出现错误的request个数
        System.out.println("Write errors:           "+errorcount);
        System.out.println("Total transferred:      "+(responsecontentlength+responseheaderslength+requestheaderslength)*count+" bytes");//(内容长度+response的头长度+request的头长度)*成功数
        System.out.println("HTML transferred:       "+publicresponse.getEntity().getContentLength()*count+" bytes");//内容长度*成功的请求总数
        System.out.println("Requests per second:    "+(long)(count+errorcount)/(longtime/1000.00)+" [#/sec] (mean)");//request总数/时间
        System.out.println("Time per request:       "+(longtime/1000.00)/(long)(count+errorcount)+" [ms] (mean)");//所用时间/request总数
        System.out.println("Time per request:       "+(longtime/1000.00)/(long)(count+errorcount)/concurrency+" [ms] (mean, across all concurrent requests)");//所用时间/request总数/并发数
        //System.out.println("Transfer rate:          36.61 [Kbytes/sec] received");//这个没有计算
        System.out.println();

    }
    /*
     * Ab线程
     */
    static class AbThread extends Thread {
        private final HttpClient httpClient;
        private final HttpContext context;
        private final HttpGet httpget;
        private final int id;

        public AbThread(HttpClient httpClient, HttpGet httpget, int id) {
            this.httpClient = httpClient;
            this.context = new BasicHttpContext();
            this.httpget = httpget;
            this.id = id;
        }
        @Override
        public void run() {
            try {
                HttpResponse response = httpClient.execute(httpget, context);
                if(publicresponse==null){
                	publicresponse=response;//取head信息用;
                }
                HttpEntity entity = response.getEntity();
                if (entity != null) {
                	byte[] bytes = EntityUtils.toByteArray(entity);//消耗掉,必须有
                	getRequestHeaderString(httpget);//-v输出,并统计request head长度
                	getResponseHeaderString(response,new String(bytes));//-v输出,并统计 response head长度
                }
            } catch (Exception e) {
            	count--;//出错则成功数减1
            	errorcount++;//错误数加1
                httpget.abort();
                System.out.println("ab: invalid URL");
                System.out.println(help);
//                System.out.println(id + " - error: " + e);
//                e.printStackTrace();
            }
        }
    }
}

----------------------------------------------------------
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
 * GBK
 * [email protected]
 */
public class SortTxt {
	public static final int sampleCount=1000;//抽样选取个数,可自定义,越大越准;
	public static final String trainTempFile="c:\\trainSet.txt";//训练集合存放的位置,可自定义
	//public static final String articlesFolder="D:\\sohu3\\articles";//文本分类的文件夹的目录
	/**
	 * @param args
	 * 比如articles.tar.bz2加压到D:\sohu3\articles
	 * 用java SortTxt traintxt D:\sohu3\articles 得到训练集合
	 * 用java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript 对这个文件夹下的文件进行分类,打印输出类别和正确率
	 */
	public static void main(String[] args) {
		if(args.length<2){
			System.out.println("please use 'java SortTxt traintxt <articlesFolder>' for example :'java SortTxt traintxt D:\\sohu3\\articles'");
			System.out.println("or 'java SortTxt sorttxt <sortFolder>' for example:'java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript'");
		}else if(args[0].equals("traintxt")){
			saveTrainSet(args[1]);
		}else if(args[0].equals("sorttxt")){
			sortFilesByTrainSet(args[1]);
		}else{
			System.out.println("please use 'java SortTxt traintxt <articlesFolder>' for example :'java SortTxt traintxt D:\\sohu3\\articles'");
			System.out.println("or 'java SortTxt sorttxt <sortFolder>' for example:'java SortTxt sorttxt D:\\sohu3\\articles\\comp.lang.javascript'");
		}
		//saveTrainSet(articlesFolder);//生成训练集合,保存到c:\trainSet.txt
		//trainTest();
		//String sortFolder="D:\\sohu3\\articles\\sci.physics.relativity";
		//sortFilesByTrainSet(sortFolder);
	}
	/*
	 * map按照value排序,从大到小,存到一个list里面;
	 */
	public static List sortMap(Map inMap) {
		Map outMap = new HashMap();
		List list = new ArrayList(inMap.entrySet());
		Collections.sort(list, new Comparator() {
			public int compare(Object o1, Object o2) {
				Map.Entry obj1 = (Map.Entry) o1;
				Map.Entry obj2 = (Map.Entry) o2;
				return Integer.valueOf(obj2.getValue().toString()).intValue()
						- Integer.valueOf(obj1.getValue().toString())
								.intValue();
			}
		});
		return list;
	}
	/*
	 * 把List<Map>转成List<String> ,String为Map的Key
	 */
	public static List convertList(List list) {
		Map outMap = new HashMap();
		List newlist = new ArrayList();
		for(int i=0;i<list.size();i++){	
			newlist.add(((Map.Entry) list.get(i)).getKey());
		}	
		return newlist;
	}
	/*
	 * 一个文件转换成map,key为单词,value为出现次数
	 */
	public static Map getMapFromFile(String filepath) {
		TreeMap myTreeMap = new TreeMap();// 创建树映射 存放键/值对
		try {
			BufferedReader buf = new BufferedReader(new FileReader(filepath));
			StringBuffer sbuf = new StringBuffer();// 缓冲字符串
			String line = null;
			while ((line = buf.readLine()) != null) {
				sbuf.append(line);// 追加到缓冲字符串中
			}
			buf.close();// 读取结束
			Pattern expression = Pattern.compile("[1-9a-zA-Z]+");// 定义正则表达式匹配单词
			String string1 = sbuf.toString();// .toLowerCase();// 转换成小写
			Matcher matcher = expression.matcher(string1);// 定义string1的匹配器
			int n = 0;// 文章中单词总数
			Object word = null;// 文章中的单词
			Object num = null;// 出现的次数
			while (matcher.find()) {// 是否匹配单词
				word = matcher.group();// 得到一个单词-树映射的键
				n++;// 单词数加1
				if (myTreeMap.containsKey(word)) {// 如果包含该键,单词出现过
					num = myTreeMap.get(word);// 得到单词出现的次数
					Integer count = (Integer) num;// 强制转化
					myTreeMap.put(word, new Integer(count.intValue() + 1));
				} else {
					myTreeMap.put(word, new Integer(1));// 否则单词第一次出现,添加到映射中
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return myTreeMap;
	}
	/*
	 * 合并两个map,相同的key值相加
	 */
	public static Map combineMap(Map map1,Map map2){
		Iterator ii=map1.keySet().iterator();
		String temp;
		while(ii.hasNext()){
			temp=(String) ii.next();
			if(map2.containsKey(temp)){
				map1.put(temp, Integer.valueOf(map1.get(temp).toString()).intValue()+Integer.valueOf(map2.get(temp).toString()).intValue());
				map2.remove(temp);
			}
		}
		map1.putAll(map2);
		return map1;
	}
	/*
	 * 一个目录下的所有文件,统计成一个map,key为单词,value为单词数
	 */
	public static Map getMapFromFolder(String filepath){
		File filesDir = new File(filepath);
		File[] files = filesDir.listFiles();
		Map map=new HashMap();
		for (int i = 0; i < files.length; i++) {
			String fileName = files[i].getName();
			String path = files[i].getAbsolutePath();
			map=combineMap(map,getMapFromFile(path));
			//System.out.println(fileName + "--" + path+"---"+map.size()+"-"+getMapFromFile(path).size());
		}
		return map;
	}
	/*
	 * 把所有文件夹下的所有文件转成一个map,key为单词,value为单词数
	 */
	public static Map getTotalMapFromFolder(String filepath) {	
		File filesDir = new File(filepath);
		File[] files = filesDir.listFiles();
		Map map=new HashMap();
		for (int i = 0; i < files.length; i++) {
			String fileName = files[i].getName();
			String path = files[i].getAbsolutePath();
			map=combineMap(map,getMapFromFolder(path));
		}
		return map;
	}
	/*
	 * map转成文件
	 */
	public static void convertMapToFile(Map map){
		try {
			BufferedWriter bufw = new BufferedWriter(new FileWriter(trainTempFile));
			Iterator iter = map.keySet().iterator();// 得到树映射键集合的迭代器
			Object key = null;
			while (iter.hasNext()) {// 使用迭代器遍历树映射的键
				key = iter.next();
				bufw.write((String) key + "::" + map.get(key));// 键/值写到文件中
				bufw.newLine();
			}
			//bufw.write("diffrent words:" + map.size() );
			bufw.close();
			System.out.println("the trainset at c:\\trainSet.txt");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	/*
	 * 文件转成map
	 */
	public static Map convertFileToMap(String filepath){
		Map fileMap=new HashMap();
		try {
			BufferedReader buf = new BufferedReader(new FileReader(filepath));//c:\\trainSet.txt
			StringBuffer sbuf = new StringBuffer();// 缓冲字符串
			String line = null;
			while ((line = buf.readLine()) != null) {
				fileMap.put(line.split("::")[0], convertStringToList(line.split("::")[1]));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return fileMap;
	}
	/*
	 * 字符串转List
	 */
	public static List convertStringToList(String arrayString){
		List value=new ArrayList();
		arrayString=arrayString.substring(1, arrayString.length()-1);//去掉两边的[]
		String[] array=arrayString.split(",");
		for(int i=0;i<array.length;i++){
			value.add(array[i].trim());
		}
		return value;
	}
	/*
	 * Map转成排好顺序的list,取前sampleCount个输出文件
	 */
	public static void convertSortedMapToFile(Map map){
		try {
			BufferedWriter bufw = new BufferedWriter(new FileWriter("trainSet.txt"));
			List theList=sortMap(map);
			theList=theList.subList(0, sampleCount);
			System.out.println(theList.size());
	        for (int i = 0; i < theList.size(); i++) {
				//System.out.println((((Map.Entry) theList.get(i)).getKey() + "--" + ((Map.Entry) theList.get(i)).getValue()));
				bufw.write((((Map.Entry) theList.get(i)).getKey() + "--" + ((Map.Entry) theList.get(i)).getValue()));
				bufw.newLine();
	        }
			//bufw.write("diffrent words:" + theList.size());
			bufw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	/*
	 * 获取训练得出的map,key为文件夹名,value为一个List,List里面存的是符合这个文件夹特征的单词
	 */
	public static Map getTrainMap(String folderpath){
		File filesDir = new File(folderpath);
		File[] files = filesDir.listFiles();
		Map maps=new HashMap();//单个文件夹,key为文件夹名称,value为List,List里为出现次数最多的sampleCount个字符
		Map total=new HashMap();
		Map temp=new HashMap();
		for (int i = 0; i < files.length; i++) {
			String fileName = files[i].getName();
			String path = files[i].getAbsolutePath();
			temp=getMapFromFolder(path);
			maps.put(path, convertList(sortMap(temp)).subList(0, sampleCount));
			//maps.put(fileName, sortMap(temp).subList(0, sampleCount));
			total=combineMap(total,temp);
		}
		List totalList=convertList(sortMap(total)).subList(0, sampleCount);//所有文件夹的总keyword生成的List
		//List totalList=sortMap(total).subList(0, sampleCount);
		//System.out.println("total:"+totalList);
		for (int i = 0; i < files.length; i++) {
			String fileName = files[i].getName();
			String path = files[i].getAbsolutePath();
			maps.put(path, restList((List)maps.get(path),totalList));
		}
		return maps;
	}
	/*
	 * 小集合减掉交集的值,这个是训练后的集合
	 */
	public static List restList(List small,List large){
		List rest=new ArrayList();
		for (Object s1 : small) {
			if (!large.contains(s1)) {
				rest.add(s1);
			}
		}
		return rest;
	}
	/*
	 * 根据目录生成训练集,保存到c:\\trainSet.txt
	 */
	public static void saveTrainSet(String folderPath){
		System.out.println("Please wait for 1 minute ........");
		Map map=getTrainMap(folderPath);
		convertMapToFile(map);
	}
	/*
	 * 打印出分类结果,并统计分类正确率
	 */
	public static void sortFilesByTrainSet(String sortFolder){
		Map map=convertFileToMap(trainTempFile);
		System.out.println("sort-------"+sortFolder+"---------");
		File filesDir = new File(sortFolder);
		File[] files = filesDir.listFiles();
		int rightcount=0;//分类正确的个数
		for (int i = 0; i < files.length; i++) {
			String aa=sortFile(files[i].getAbsolutePath(),map).trim();
			System.out.println("file "+files[i].getAbsolutePath()+" belong to " +aa);
			if(aa.equals(sortFolder.trim())){
				rightcount++;
			}
		}
		int i = rightcount;  //分类正确的个数
		//System.out.println(i);
        int j = files.length ;
        DecimalFormat df = new DecimalFormat();  
		df.setMaximumFractionDigits(2);  
		df.setMinimumFractionDigits(2);  
		String accuracy= df.format(i * 100.00 / j) + "%"; 
		System.out.println("the accuracy is "+accuracy);
	}
	public static void trainTest() {
		long l1=System.currentTimeMillis();
		//Map map=getTrainMap("D:\\sohu3\\test\\test");
		//Map map=getMapFromFolder("D:\\sohu3\\test\\comp.lang.javascript");
		//Map map=getTotalMapFromFolder("D:\\sohu3\\articles");//"D:\\sohu3\\test\\"
		//Map map=getTrainMap("D:\\sohu3\\articles");
		Map map=convertFileToMap(trainTempFile);
		Iterator it = map.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            Object key = entry.getKey();
            Object value = entry.getValue();
            System.out.println(key+"--"+value);
            System.out.println(key+"--"+((List) value).size());
        }
        //System.out.println(map.size());
//        List theList=sortMap(map); 
//        for (int i = 0; i < theList.size(); i++) {
//			System.out.println((((Map.Entry) theList.get(i)).getKey() + ":" + ((Map.Entry) theList.get(i)).getValue()));
//		  }
//        System.out.println(theList.size());
        //convertMapToFile(map);  
        //convertSortedMapToFile(map);
        
        System.out.println(sortFile("D:\\sohu3\\articles\\comp.lang.javascript\\555836.txt",map));
        long l2=System.currentTimeMillis();
        System.out.println(l2-l1+"ms");
	}
	/*
	 * 把一个文件进行分类,filePath是文件路径,trainMap是训练得到的Map,这个map里,key是文件夹名,value是这个文件夹的培训得到的List,List里是训练得到的单词
	 * 返回值为这个文件应在的类别中
	 * 作用:根据训练集合把文件分类,返回文件的类别目录;
	 */
	public static String sortFile(String filePath,Map trainMap){
		Iterator it = trainMap.entrySet().iterator();
		//Map resultMap=new HashMap();
		int top=0;//最大值
		int tmp=0;
		String returnValue="";
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            String key = (String) entry.getKey();
            List value = (List) entry.getValue();
            tmp=getCountFromMap(value,getMapFromFile(filePath));
           // resultMap.put(key, tmp);
            if(top<tmp){//从map中取得value最大的,并返回这个key值
            	top=tmp;
            	//System.out.println(key);
            	returnValue=key;
            }
        }       
        return returnValue;
	}
	/*
	 * 根据,从文件生成的map,训练集得到了List,获得,训练集中的keyword在文件中出现的次数总数
	 */
	public static int getCountFromMap(List train,Map mapFromFile){
		int count=0;
		String tempkey="";
		for(int i=0;i<train.size();i++){
			tempkey=train.get(i).toString();
			if(mapFromFile.get(tempkey)!=null){
				count+=Integer.valueOf(mapFromFile.get(tempkey).toString()).intValue();
			}
		}
		return count;
	}
	

}

你可能感兴趣的:(JavaScript,c,socket,正则表达式,REST)