使用jsoup模拟火狐浏览器进行登录抓取页面数据

使用jsoup模拟火狐浏览器进行登录抓取页面数据

import com.qyvip.libra.jfinal.base.BaseController;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class StudentMessageController extends BaseController {
    public static String LOGIN_URL = "http://ahhnjy.263.net/eschool/login";
    public static String USER_AGENT = "User-Agent";
    public static String USER_AGENT_VALUE = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0";
    public static void main(String[] args) throws Exception {
        simulateLogin("此处填入用户名", "此处填入密码"); // 模拟登陆github的用户名和密码
    }
    public static void simulateLogin(String userName, String pwd) throws Exception {
        Connection con = Jsoup.connect(LOGIN_URL);  // 获取connection
        con.header(USER_AGENT, USER_AGENT_VALUE);   // 配置模拟浏览器
        Connection.Response rs = con.execute();                // 获取响应
        Document d1 = Jsoup.parse(rs.body());       // 转换为Dom树
        List eleList = d1.select("#loginForm");  // 获取提交form表单,可以通过查看页面源码代码得知

        Map datas = new HashMap<>();
        for (Element e : eleList.get(0).getAllElements()) {
            // 设置用户名
            if (e.attr("name").equals("username")) {
                e.attr("value", userName);
            }
            if (e.attr("name").equals("password")) {
                e.attr("value", pwd);
            }
            if (e.attr("name").length() > 0) {
                datas.put(e.attr("name"), e.attr("value"));
            }
        }

        Connection con2 = Jsoup.connect("http://ahhnjy.263.net/eschool/loginCheck");
        con2.header(USER_AGENT, USER_AGENT_VALUE);
        Connection.Response login = con2.ignoreContentType(true).followRedirects(true).method(Connection.Method.POST)
                .data(datas).cookies(rs.cookies()).execute();

        Document doc = Jsoup.connect("http://ahhnjy.263.net/eschool/admin/user?isFirst=1").cookies(login.cookies())
                .header("Accept", "*/*")
                .header("Accept-Encoding", "gzip, deflate")
                .header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
                .header("Referer", "https://www.baidu.com/")
                .header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0")
                .timeout(5000)
                .get();

        List str=new ArrayList();
        Elements trs = doc.select("table").select("tr");
        for(int i = 0;i

需要导入的maven依赖


        
        org.jsoup
        jsoup
        1.11.3

你可能感兴趣的:(使用jsoup模拟火狐浏览器进行登录抓取页面数据)