import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.FileOutputStream; import java.io.FileReader; import java.io.RandomAccessFile; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Set; public class IpStats { private String saveDir; public final static String CRLF = "\r\n"; /** * 读取IP * * @param fileName */ public void readIp(String fileName) { try { BufferedReader reader = new BufferedReader(new FileReader(fileName)); HashMap<String, String> mapListHashMap = new HashMap<String, String>(); int lineSize=0; //RandomAccessFile rndAccessFile=new RandomAccessFile(new File(fileName),"r" ); while (true) { String line = reader.readLine(); if (line == null) { break; } long ipCounts = getIpCount(line); if (ipCounts != -1) { long rnd = ipCounts % 1024; String toFileName = rnd + ".dat"; ///System.out.println(" parpare write to file " +toFileName ); String strContent = mapListHashMap.get(toFileName); if (strContent != null) { strContent += line + CRLF; mapListHashMap.put(toFileName, strContent); } else { mapListHashMap.put(toFileName, line + CRLF); } lineSize++; } if (lineSize >= 10000) { Set<String> sets = mapListHashMap.keySet(); for (String sFileName : sets) { write2IpFile(sFileName, mapListHashMap.get(sFileName)); //System.out.println("write to file " +sFileName); } System.out.println("write to file length " + lineSize ); lineSize=0; mapListHashMap.clear(); } } reader.close(); Set<String> sets = mapListHashMap.keySet(); for (String sFileName : sets) { write2IpFile(sFileName, mapListHashMap.get(sFileName)); System.out.println("write to file " +sFileName); } mapListHashMap.clear(); System.out.println("read ip complete "); } catch (Exception e) { e.printStackTrace(); } } /** * * @param ipCountMap */ private List<ListItem> sort(HashMap<String, Long> ipCountMap) { Set<String> sets = ipCountMap.keySet(); List<ListItem> ipList = new ArrayList<ListItem>(); for (String ip : sets) { ListItem item = new ListItem(); item.setIp(ip); item.setIpCounts(ipCountMap.get(ip)); ipList.add(item); } System.out.println("sort ..." + ipCountMap); Collections.sort(ipList, new Comparator<ListItem>() { @Override public int compare(ListItem o2, ListItem o1) { if (o1.getIpCounts() - o2.getIpCounts() >= 1) { return 1; } else if (o1.getIpCounts() - o2.getIpCounts() == 0) { return 0; } return -1; } }); return ipList; } /* * read top 10 ip */ private void readTop10(File file) { try { int index = 0; BufferedReader reader = new BufferedReader(new FileReader(file)); while (true) { String line = reader.readLine(); if (line == null) { break; } if (index > 10) { break; } index++; System.out.println(line); } reader.close(); file.renameTo(new File(saveDir+"result.txt")); } catch (Exception e) { e.printStackTrace(); } } /* * start Stat SimpleFile */ public void startWork(String dir) { File[] files = new File(dir).listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".dat"); } }); for (File file : files) { System.out.println("stat file ... " + file.getAbsoluteFile()); statFile(file.getAbsolutePath()); System.out.println("stat file ... " + file.getAbsoluteFile()+ " end ---"); } File lastFile; while (true) { File[] curFiles = new File(dir).listFiles(new FileFilter() { @Override public boolean accept(File pathname) { return pathname.getName().endsWith(".cout"); } }); if (curFiles.length == 1) { lastFile = curFiles[0]; break; } else { System.out.println("mereFile file ... " + curFiles[0].getAbsolutePath()); Merge.mereFile(saveDir, curFiles[0].getAbsolutePath(), curFiles[1].getAbsolutePath()); System.out.println("mereFile end -- " + curFiles[0].getAbsolutePath()); } } System.out.println("stat file complete "); readTop10(lastFile); } /* * stat file */ private void statFile(String fileName) { HashMap<String, Long> ipCountMap = new HashMap<String, Long>(); try { File curFile = new File(fileName); BufferedReader reader = new BufferedReader(new FileReader(curFile)); while (true) { String line = reader.readLine(); if (line == null) { break; } Long counts = ipCountMap.get(line); if (counts == null) { ipCountMap.put(line, Long.parseLong("1")); } else { long couts = counts.longValue() + 1; ipCountMap.put(line, Long.parseLong(couts + "")); } } reader.close(); Set<String> sets = ipCountMap.keySet(); String sName = new File(fileName).getName(); int index = sName.indexOf("."); if (index != -1) { sName = sName.substring(0, index); } sName = sName + "_counts.cout"; List<ListItem> ipList = sort(ipCountMap); for (ListItem ipItem : ipList) { write2IpFile(sName, ipItem.getIp() + "\t" + ipItem.getIpCounts() + CRLF); } //curFile.renameTo(new File(curFile.getAbsolutePath() + "-old")); curFile.delete(); } catch (Exception e) { e.printStackTrace(); } } /* * write to ip file */ private void write2IpFile(String fileName, String ipContent) { try { String filePath = saveDir + fileName; BufferedOutputStream fout = new BufferedOutputStream( new FileOutputStream(new File(filePath), true)); fout.write(ipContent.getBytes()); fout.close(); } catch (Exception e) { e.printStackTrace(); } } /** * get ipcounts * * @param ip * @return */ private long getIpCount(String ip) { long ipcounts = 0; String[] ipArr = ip.split("\\."); if (ipArr.length != 4) { return -1; } ipcounts = 256 * 256 * 256 * Integer.parseInt(ipArr[0]) + 256 * 256 * Integer.parseInt(ipArr[1]) + 256 * Integer.parseInt(ipArr[2]) + Integer.parseInt(ipArr[3]); return ipcounts; } /* * * [email protected] tomjnefu 2013-07-13 * * @param args */ public static void main(String[] args) { IpStats stat = new IpStats(); /* * 测试文件夹 d:/bigfiles/ big.txt 为 2G的大文件 */ String saveDir = "d:/bigfiles/"; stat.setSaveDir(saveDir); stat.readIp("d:/bigfiles/big.txt"); stat.startWork(saveDir); } public void setSaveDir(String saveDir) { this.saveDir = saveDir; } public String getSaveDir() { return saveDir; } }