使用java处理日志文件,完成需求。
http.log日志文件中,是电信运营商记录用户上网访问某些网站行为的日志记录数据,一条数据中有多个字段用空格或制表符分隔。
例如:"18611132889 http://v.baidu.com/tv 20 5000"是一条上网行为,第一个字段代表手机号码,第二个字段代表请求网站的URL,
第三个字段代表请求发送的数据即上行流量(20字节),第四个字段代表服务器响应给用户的流量即下行流量(5000字节)。
phone.txt是手机号段规则,是手机号码对应地区城市和运营商的数据
package com.traffic;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry;
public class SortUtils {
/**
* 根据sun进行排序
* @param list
*/
public static void sortBySum(List<Entry<String, Long>> list) {
Collections.sort(list, new Comparator<Entry<String, Long>>() {
@Override
public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
return o2.getValue() - o1.getValue() > 0 ? 1 : -1;
}
});
}
}
需求1代码实现
package com.traffic;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestMain1 {
public static void main(String[] args) {
// 获取URL以及对应的流量和
Map<String, Long> map = getUrlSum();
// 转化为list进行排序
Set<Entry<String, Long>> entrySet = map.entrySet();
List<Entry<String, Long>> list = new ArrayList<>(entrySet);
// 排序
SortUtils.sortBySum(list);
/*
* for (Entry entry : list) { System.out.println(entry); }
*/
try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:\\data\\1"));) {
for (int i = 0; i < 3; i++) {
Entry<String, Long> entry = list.get(i);
//使用toString方法转化为toString
bw.write(entry.toString());
bw.newLine();
System.out.println(list.get(i));
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取URL以及对应的流量和
*
* @return
*/
private static Map<String, Long> getUrlSum() {
// 用来存储URL以及对应的流量数据
Map<String, Long> map = new HashMap<>();
try (BufferedReader br = new BufferedReader(new FileReader("D:\\data\\http.log"));) {
String line = null;
while ((line = br.readLine()) != null) {
// System.out.println(line);
String string = line.split("\t")[1];
String[] split = string.split(" ");
String oldUrl = split[0];
String up = split[1];
String down = split[2];
String url = getUrlByRgex(oldUrl);
Long upDown = Long.parseLong(up) + Long.parseLong(down);
// 如果通过URL直接找到了流量则直接返回,否则就是0
Long sum = map.getOrDefault(url, 0L);
sum = sum + upDown;
map.put(url, sum);
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
/**
* 正则截取URL
*
* @param oldUrl
* @return
*/
private static String getUrlByRgex(String oldUrl) {
Pattern compile = Pattern.compile("(\\w+\\.)?(\\w+\\.){1}\\w+");
Matcher matcher = compile.matcher(oldUrl);
while (matcher.find()) {
return matcher.group();
}
return null;
}
}
需求2代码实现
package com.traffic;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class TestMain2 {
public static void main(String[] args) {
// 通过文件phone.txt获取到手机号前七位对应的省份的map
Map<String, String> map1 = getNumProvince();
// System.out.println(map1.size());
// 获取手机号对应的流量
Map<String, Long> map2 = getPNumSum();
// 用来存放省份对应的流量
Map<String, Long> map3 = new HashMap<>();
Set<Entry<String, Long>> entrySet = map2.entrySet();
for (Entry<String, Long> entry : entrySet) {
String key = entry.getKey();
Long value = entry.getValue();
String pNum7 = key.substring(0, 7);
String province = map1.get(pNum7);
Long sum = map3.getOrDefault(province, 0L);
sum+=value;
map3.put(province, sum);
}
/*for (Entry entry : map3.entrySet()) {
System.out.println(entry);
}*/
//排序 map -> list
Set<Entry<String,Long>> entrySet2 = map3.entrySet();
ArrayList<Entry<String,Long>> arrayList = new ArrayList<>(entrySet2);
SortUtils.sortBySum(arrayList);
/*for (Entry entry : arrayList) {
System.out.println(entry);
}*/
for(int i = 0;i<3;i++) {
System.out.println(arrayList.get(i));
}
}
/**
* 获取手机号对应的流量
*
* @return
*/
private static Map<String, Long> getPNumSum() {
Map<String, Long> map = new HashMap<>();
try (BufferedReader br = new BufferedReader(new FileReader("D:/data/http.log"));) {
String line = null;
while ((line = br.readLine()) != null) {
// System.out.println(line);
String[] split = line.split("\t");
String pNum = split[0];// 截取???
String[] split2 = split[1].split(" ");
String up = split2[1];
String down = split2[2];
Long upDown = Long.parseLong(up) + Long.parseLong(down);
// System.out.println(sum);
// 相应的做累加
Long sum = map.getOrDefault(pNum, 0L);
sum = sum + upDown;
map.put(pNum, sum);
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
/**
* 通过文件phone.txt获取到手机号前七位对应的省份的map
*
* @return
*/
private static Map<String, String> getNumProvince() {
Map<String, String> map = new HashMap<>();
try (BufferedReader br = new BufferedReader(new FileReader("D:/data/phone.txt"));) {
String line = null;
br.readLine();// 跳过第一行
while ((line = br.readLine()) != null) {
// System.out.println(line);
String[] split = line.split("\t");
String pNum7 = split[1];
String province = split[2];
// System.out.println(province);
map.put(pNum7, province);
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
}
https://github.com/wangjiaxi/traffic