JAVA爬虫练习~通过杭电oj账号查询做题数

1 首先我们来看一下 效果:

JAVA爬虫练习~通过杭电oj账号查询做题数_第1张图片

 

JAVA爬虫练习~通过杭电oj账号查询做题数_第2张图片

通过查找我们得到 了AC做题数 , 我们爬取HTML代码 , 通过自己的逻辑 筛选 所需要的信息

接下来是代码:


package day_1;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Scanner;

public class test {
	static int asd=0;;

	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		System.out.println("############");
		System.out.println("欢迎查询杭电oj个人信息,请输入账号");
		System.out.print(":");
		int aaa = 0;
		 asd = 0;
		while (true) {
			if (aaa == 0) {
				String str = sc.next();
				System.out.println("....等候");
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				getHTMLSrc("http://acm.hdu.edu.cn/userstatus.php?user=" + str);
				if (asd == 0) {
					System.out.println("查找的账号不存在");
				}
				asd=0;
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				System.out.println();
				aaa++;

			}
				System.out.println("可重复输入账号");
				System.out.print(":");
				String str = sc.next();
				System.out.println("....等候");
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				getHTMLSrc("http://acm.hdu.edu.cn/userstatus.php?user=" + str);
				if (asd == 0) {
					System.out.println("查找的账号不存在");
					
				}
				asd=0;
				System.out.println("~~~~~~~~~~~~~~~~~~~~");
				System.out.println();
			}

		}
	

	public static void getHTMLSrc(String url) {
		InputStream openStream = null;
		BufferedReader buf = null;

		try {
			String line = null;
			URL theUrl = new URL(url);
			openStream = theUrl.openStream();
			buf = new BufferedReader(new InputStreamReader(openStream, "GBK"));
			while ((line = buf.readLine()) != null) {

				if (line.contains("

")) { System.out.print("用户昵称:"); asd++; asd++; qwe: for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '>') { for (int k = i + 1; k < line.length(); k++) { if (line.charAt(k) == '<') { break qwe; } System.out.print(line.charAt(k)); } } } System.out.println(); } // 用户名 if (line.contains("from:")) { System.out.print("学校:"); // System.out.println(line); 这里面有用户建立的时间 qwe: for (int i = line.length() - 1; i >= 0; i--) { if (line.charAt(i) == ':') { for (int k = i + 2; k < line.length(); k++) { if (line.charAt(k) == '&') { break qwe; } System.out.print(line.charAt(k)); } } } } int sq = 0; String qwww = ""; if (line.contains("Problems Solved")) { System.out.println(); System.out.print("AC题目数量:"); qwee: for (int i = 0; i < line.length(); i++) { if (sq == 4) { for (int q = i; q < line.length(); q++) { if (line.charAt(i) == '<') { break qwee; } qwww = qwww + line.charAt(q); } } if (line.charAt(i) == '>') { sq++; } } for (int i = 0; i < qwww.length(); i++) { if (qwww.charAt(i) == '<') { break; } else { System.out.print(qwww.charAt(i)); } } System.out.println(); } } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (openStream != null) { openStream.close(); } if (buf != null) { buf.close(); } } catch (IOException e) { e.printStackTrace(); } } } }

 

 

 

 

 

 

 

 

你可能感兴趣的:(JAVA爬虫练习~通过杭电oj账号查询做题数)