找出多个文本中频率高的单词(3)

采用concurrent 中的Future + ExecutorService

FileManager.java

package com.anders.future;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Manage files and offer single for every thread
 * 
 * @author Anders
 * 
 */
public class FileManager {

	private static List<File> fileList;
	private static int index = 0;

	static {
		String dirPath = PropertiesUtil.get("DirName");
		String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();
		fileList = getFiles(path);
	}

	public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
System.out.println(file.getName());		
		return file;
	}

	private static List<File> getFiles(String dirPath) {

		File dir = new File(dirPath);
		if (!dir.exists() || !dir.isDirectory()) {
			return Collections.emptyList();
		}

		File[] files = dir.listFiles();

		//判断 是不是  以txt结尾的文件
		Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));
		List<File> list = new ArrayList<File>();

		for (File file : files) {
			Matcher matcher = pattern.matcher(file.getName());
			if (matcher.matches()) {
				list.add(file);
			}
		}

		return list;
	}

	//读取文件  使用的是java.nio的filechannel 和bytebuffer
	public static void parseFile(File file, Map<String, Integer> map) {
		FileInputStream ins = null;
		try {
			ins = new FileInputStream(file);
			FileChannel fIns = ins.getChannel();
			ByteBuffer buffer = ByteBuffer.allocate(1024);

			while (true) {
				buffer.clear();
				int r = fIns.read(buffer);
				if (r == -1) {
					break;
				}
				buffer.flip();
				buffer2word(buffer, map);
			}
			fIns.close();

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (ins != null) {
					ins.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	//这个是  将读取的内容,提取出  英语字母
	private static void buffer2word(ByteBuffer buffer, Map<String, Integer> map) {
		StringBuilder str = new StringBuilder();
		for (int i = 0; i < buffer.limit(); i++) {
			byte b = buffer.get();
			if (isEnglishChar(b)) {
				str.append((char) b);
			} else {
				word2map(str.toString().toLowerCase(), map);
				str = new StringBuilder();
			}
		}
	}

	//将  英语单词放到Map中
	private static void word2map(String word, Map<String, Integer> map) {
		Integer count = map.get(word);
		if (null == count) {
			map.put(word, 1);
		} else {
			map.put(word, ++count);
		}
	}

	//看看是否是  英语字符
	private static boolean isEnglishChar(byte b) {
		//通过ASCLL码  判断
		if (b > 65 && b < 91) {
			return true;
		}
		if (b > 97 && b < 123) {
			return true;
		}
		return false;
	}

}

main.java

package com.anders.future;

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

public class Main {

	public static void main(String[] args) {

		int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));

		ExecutorService es = Executors.newFixedThreadPool(threadNumber);

		Future<Map<String, Integer>>[] futures = new Future[threadNumber];
		try {

			for (int i = 0; i < threadNumber; i++) {
				futures[i] = es.submit(new Callable<Map<String, Integer>>() {

					@Override
					public Map<String, Integer> call() throws Exception {

						Map<String, Integer> map = new HashMap<String, Integer>();

						while (true) {
							File file = FileManager.getFile();
							if (file == null) {
								break;
							}
							FileManager.parseFile(file, map);
						}
						return map;
					}
				});
			}

			Map<String, Integer> map = mergeThreadMap(futures);

			display(map);

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			es.shutdown();
		}

	}

	private static Map<String, Integer> mergeThreadMap(Future<Map<String, Integer>>[] futures) throws Exception, ExecutionException {
		Map<String, Integer> map = new HashMap<String, Integer>();

		for (Future<Map<String, Integer>> future : futures) {
			Map<String, Integer> threadMap = future.get();

			for (Map.Entry<String, Integer> entry : threadMap.entrySet()) {
				String threadWord = entry.getKey();
				Integer threadWordCount = entry.getValue();
				Integer wordCount = map.get(threadWord);

				if (wordCount == null) {
					map.put(threadWord, threadWordCount);
				} else {
					map.put(threadWord, threadWordCount + wordCount);
				}
			}
		}

		return map;
	}

	private static void display(Map<String, Integer> map) {

		for (Map.Entry<String, Integer> entry : map.entrySet()) {
			System.out.print(entry.getKey());
			System.out.println("   ," + entry.getValue());
		}

	}

}

PropertiesUtil.java

package com.anders.future;

import java.io.IOException;
import java.util.Properties;

public class PropertiesUtil {

	private static Properties prop = new Properties();
	
	static {
		try {
			prop.load(PropertiesUtil.class.getClassLoader().getResourceAsStream("config.properties"));
		} catch (IOException e) {
			throw new ExceptionInInitializerError("Init config properties error!");
		}
	}
	
	public static String get(String key){
		return prop.getProperty(key);
	}

}

config.properties

ThreadNumber=3
DirName=txt
FileType=.*.txt



你可能感兴趣的:(找出多个文本中频率高的单词(3))