数据分析案例——IP归属地分析

根据用户上网数据,完成上网IP归属地分析统计,并进行相应排序。

简介

给定的access.log是电信运营商的用户上网数据,第一个字段是时间,第二个字段是ip地址,第三更字段是访问的网站,其他字段可以忽略不计。

ip.txt是ip地址和归属地的规则数据,里面的数据是根据ip地址的十进制从高到低排序。
第一个字段是网段的起始IP地址,第二个字段是网段的结束IP地址,第三个字段是网段的起始IP地址对应的十进制,第四个字段是网段的结束IP地址对应的十进制,第五个字段代表洲,第六个代表国家,第七个代表省,第八个代表城市,其他字段可以忽略不计。


需求

通过计算access.log中的用户行为数据,统计出各个省份访问量(一次请求记作一次独立的访问量), 并按照各个省份的访问量的从高到低进行排序。


思路解析

  1. 创建一个javaBean,IpBean
  2. 写ipUtiles工具类中的StringIp转换为长整型的ip
  3. 读取ip.txt文件,返回一个List
  4. 读取access.log
  • 切分出ip地址
  • Ip地址转换为长整型的ip,调用的IpUtiles.strIpToLongIp()
  • 通过长整型的ip去查找对应的IpBean,调用的IpUtiles.getIpBeanByLongIp()
  • 获取省份,放进map里面,Map<省份,次数>
  1. map排序

数据源

  • ip.txt
  • access.log

代码实现

IpBean.java

public class IpBean {
	private String startIp;
	private String endIp;
	private long startDecIp;
	private long endDecIp;
	private String province;
	private String city;
	private String optioner;

	public void set(String startIp, String endIp, long startDecIp, long endDecIp, String province, String city,
			String optioner) {
		this.startIp = startIp;
		this.endIp = endIp;
		this.startDecIp = startDecIp;
		this.endDecIp = endDecIp;
		this.province = province;
		this.city = city;
		this.optioner = optioner;
	}

	public String getStartIp() {
		return startIp;
	}

	public void setStartIp(String startIp) {
		this.startIp = startIp;
	}

	public String getEndIp() {
		return endIp;
	}

	public void setEndIp(String endIp) {
		this.endIp = endIp;
	}

	public long getStartDecIp() {
		return startDecIp;
	}

	public void setStartDecIp(long startDecIp) {
		this.startDecIp = startDecIp;
	}

	public long getEndDecIp() {
		return endDecIp;
	}

	public void setEndDecIp(long endDecIp) {
		this.endDecIp = endDecIp;
	}

	public String getProvince() {
		return province;
	}

	public void setProvince(String province) {
		this.province = province;
	}

	public String getCity() {
		return city;
	}

	public void setCity(String city) {
		this.city = city;
	}

	public String getOptioner() {
		return optioner;
	}

	public void setOptioner(String optioner) {
		this.optioner = optioner;
	}

	@Override
	public String toString() {
		return "IpBean [startIp=" + startIp + ", endIp=" + endIp + ", startDecIp=" + startDecIp + ", endDecIp="
				+ endDecIp + ", province=" + province + ", city=" + city + ", optioner=" + optioner + "]";
	}
}

IpUtils.java

public class IpUtils {
	// 目的是减少读取文件的次数,
	public static List<IpBean> ipBeanList = null;
	static {
		ipBeanList = getIpBeanList();
	}

	public static void main(String[] args) {
		// System.out.println(strIpToLongIp("1.0.1.0"));

		List<IpBean> ipBeanList = getIpBeanList();
		System.out.println(ipBeanList.size());
	}

	/**
	 * 通过stringIp转换为长整型的ip
	 * 
	 * @param str
	 * @return
	 */
	public static long strIpToLongIp(String str) {
		if (str == null) {
			return 0L;
		}
		long newIp = 0;
		String[] split = str.split("\\.");
		for (int i = 0; i <= 3; i++) {
			long lL = Long.parseLong(split[i]);
			newIp |= lL << ((3 - i) << 3);
		}
		return newIp;
	}

	/**
	 * 获取存放ipBean的list集合
	 * 
	 * @return
	 */
	public static List<IpBean> getIpBeanList() {
		List<IpBean> list = new ArrayList<>();
		try (BufferedReader br = new BufferedReader(new FileReader("D:\\data\\ip.txt"));) {
			String line = null;
			while ((line = br.readLine()) != null) {
				// System.out.println(line);
				String[] split = line.split("\\|");
				String startIp = split[0];
				String endIp = split[1];
				long startDecIp = Long.parseLong(split[2]);
				long endDecIp = Long.parseLong(split[3]);
				String province = split[6];
				String city = split[7];
				String optioner = split[9];
				// System.out.println(optioner);
				IpBean bean = new IpBean();
				bean.set(startIp, endIp, startDecIp, endDecIp, province, city, optioner);
				list.add(bean);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return list;
	}

	/**
	 * 通过longIp从list里面获取相对应的IpBean
	 * 
	 * @param longIp
	 * @return
	 */
	@Deprecated
	public static IpBean getIpBeanByLongIp(long longIp) {
		for (IpBean ipBean : ipBeanList) {
			if (longIp >= ipBean.getStartDecIp() && longIp <= ipBean.getEndDecIp()) {
				return ipBean;
			}
		}
		return null;
	}

	/**
	 * 使用二分法通过ip找到对应的ipBean
	 * 
	 * @param longIp
	 * @return
	 */
	public static IpBean getIpBeanByLongIpNew(long longIp) {
		int start = 0;
		int end = ipBeanList.size() - 1;
		while (start <= end) {
			int middel = (start + end) / 2;
			IpBean ipBean = ipBeanList.get(middel);
			// 如果middel对应的ipBean是不是找的值
			if (longIp >= ipBean.getStartDecIp() && longIp <= ipBean.getEndDecIp()) {
				return ipBean;
			}
			// 小于最小值的时候
			if (longIp < ipBean.getStartDecIp()) {
				end = middel - 1;
			}
			// 大于最大值的时候
			if (longIp > ipBean.getEndDecIp()) {
				start = middel + 1;
			}
		}

		return null;
	}

}

TestMain.java

public class TestMain {
	public static void main(String[] args) {
		// 存放<省份---次数>
		Map<String, Integer> map = new HashMap<>();
		try (BufferedReader br = new BufferedReader(new FileReader("D:/data/access.log"));) {
			String line = null;
			while ((line = br.readLine()) != null) {
				String[] split = line.split("\\|");
				String strIp = split[1];
				// 通过字符串ip获取长整型的ip
				long longIp = IpUtils.strIpToLongIp(strIp);
				// 通过长整型的ip获取对应的IpBean
				IpBean bean = IpUtils.getIpBeanByLongIpNew(longIp);
				// System.err.println(bean);
				// 取值添加到map中
				String province = bean.getProvince();
				Integer count = map.getOrDefault(province, 0);
				count++;
				map.put(province, count);
			}

			/*
			 * for (Entry entry : map.entrySet()) {
			 * System.out.println(entry); }
			 */
			// map的排序
			Set<Entry<String, Integer>> entrySet = map.entrySet();
			List<Entry<String, Integer>> list = new ArrayList<>(entrySet);
			Collections.sort(list, new Comparator<Entry<String, Integer>>() {

				@Override
				public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
					return o2.getValue() - o1.getValue();
				}
			});
			// 保存到文件中
			for (Entry<String, Integer> entry : list) {
				System.out.println(entry);
			}

		} catch (Exception e) {
			e.printStackTrace();
		}

	}

}

你可能感兴趣的:(java)