Java笔试题,读取两篇英文文档,输出其中两篇都出现单词的次数最多的3个

         考虑用BufferedReader来分别读取两篇文档,20M的缓存空间来缓存文本。将第一个文档所有的单词与个数放在一个treeMap当中,对第二个文档读取的时候,根据第一个文档的treeMap来筛选,相加得到都有的单词统计map集合。最后将map放入arrayList当中,进行排序,输出top3的单词。

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CountsWord {
	
	public static void main(String[] args) throws Exception {
		
		File file1 = new File("E:\\text1.txt");
		File file2 = new File("E:\\text2.txt");
		BufferedInputStream inputStream1 = new BufferedInputStream (new FileInputStream(file1));
		BufferedInputStream inputStream2 = new BufferedInputStream (new FileInputStream(file2));
		// 用20M的缓冲读取文本文件
		BufferedReader readfile1 = new BufferedReader(new InputStreamReader(inputStream1, "utf-8"), 20 * 1024 * 1024);
		BufferedReader readfile2 = new BufferedReader(new InputStreamReader(inputStream2, "utf-8"), 20 * 1024 * 1024);
		
		String text1 = null;
		String text2 = null;
		Pattern patten = Pattern.compile("[a-zA-Z]+");
		Map map1 = new TreeMap();
		Map map2 = new TreeMap();
		
		//读取每一行文本的数据,正则把英文单词找出来,然后放入treemap当中统计数字
		while ((text1 = readfile1.readLine()) != null) {
			Matcher matcher = patten.matcher(text1);
			while (matcher.find()) {
				String word = matcher.group();
				if (map1.containsKey(word)) {
					Integer wordfrequency = map1.get(word);
					map1.put(word, wordfrequency + 1);
				} else {
					map1.put(word, 1);
				}
			}
		}
		while ((text2 = readfile2.readLine()) != null) {
			Matcher matcher = patten.matcher(text2);
			while (matcher.find()) {
				String word = matcher.group();
				//map1中含有的word才计入计算
				if(map1.containsKey(word)) {
					if (map2.containsKey(word)) {
						Integer wordfrequency = map2.get(word);
						map2.put(word, wordfrequency + 1);
					} else {
						map2.put(word, map1.get(word) + 1);
					}
				}
			}
		}
		readfile1.close();
		readfile2.close();
		
		//将treemap中的键值对的set视图存入ArrayList中,其中的类型必须是Map.Entry,
		//因为TreeMap中的entrySet()方法的返回类型就是Map.Entry类型,其实Map.Entry就是个接口。
		//将treemap存入ArrayList的目的就是用Collections类中的sort()方法进行排序
		List> list = new ArrayList>(map2.entrySet());
		Comparator> comparator = new Comparator>() {
			@Override
			public int compare(Entry o1, Entry o2) {
				return (o2.getValue().compareTo(o1.getValue()));
			}
		};
		Collections.sort(list,comparator);
		
		for(int i=0; i<3; i++) {
			System.out.println(list.get(i).getKey() + ":" + list.get(i).getValue());
		}
	}
}

你可能感兴趣的:(Java)