java实现读取一篇英文文章,统计其中每个单词出现的次数并排序输出

代码:package com.test.string; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * java实现读取英文文章,统计其中每个单词出现的次数并输出 * @author halbert * */ public class FileWordCount { public void count() throws IOException{ BufferedReader reader = new BufferedReader(new FileReader("D:\\test\\english.txt")); StringBuffer buffer = new StringBuffer(); String line = null; while( (line = reader.readLine()) != null ){ buffer.append(line); } reader.close(); Pattern expression = Pattern.compile("[a-zA-Z]+"); //定义正则表达式匹配单词 String string = buffer.toString(); Matcher matcher = expression.matcher(string); Map map = new TreeMap(); String word = ""; int n = 0; Integer times = 0; while(matcher.find()){ //是否匹配单词 word = matcher.group(); //得到一个单词,树映射中的键 n++; if( map.containsKey(word) ){ //如果该键存在,则表示单词出现过 times = map.get(word); //得到单词出现的次数 map.put(word, times+1); } else { map.put(word, 1); //否则单词是第一次出现,直接放入map } } List> list = new ArrayList>(map.entrySet()); Comparator> comparator = new Comparator>(){ @Override public int compare(Entry left, Entry right) { return (left.getValue()).compareTo(right.getValue()); } }; Collections.sort(list, comparator); System.out.println("统计分析如下:"); System.out.println("t 文章中单词总数" + n + "个"); System.out.println("具体的信息在原文件目录的result.txt文件中"); BufferedWriter bufw = new BufferedWriter(new FileWriter("D:\\test\\result.txt")); for(Entry me : list){ bufw.write(me+""); bufw.newLine(); } bufw.write("english.txt中的单词总数" + n + "个"); bufw.newLine(); bufw.write("english.txt中不同单词" + map.size() + "个"); bufw.close(); } public static void main(String[] args) { try { FileWordCount fwc = new FileWordCount(); fwc.count(); } catch (IOException e) { e.printStackTrace(); } } }


运行结果:统计分析如下: t 文章中单词总数997个 具体的信息在当前目录的result.txt文件中

java实现读取一篇英文文章,统计其中每个单词出现的次数并排序输出_第1张图片


你可能感兴趣的:(测试题)