Jsoup GET POST爬取数据

1 依赖


  
  org.jsoup
  jsoup
  1.13.1

2 Get 请求

2.1 HTML

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

...

public JSONObject doGet(String paramUrl) {
    try {
        Document doc = Jsoup.connect(paramUrl)
        					.ignoreContentType(true)
                			.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1295.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat")
               			    .timeout(10000)
                			.get();
        String returnStr = doc.getElementsByClass("标签名").text();
        JSONObject reqResJson = changeJson(returnStr);  // 转换成 json 数据
        return reqResJson;
    }
    catch (Exception e) {
		e.printStackTrace();
    }
    return null;
}

2.2 Json

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doGet(String url) {
	try {
		Response response = Jsoup.connect(url)
								 .ignoreContentType(true)
		 						 .header("Content-type", "application/x-www-form-urlencoded; charset=UTF-8")
								 .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封装的工具类
								 .method(Method.GET)
							   	 .timeout(10000)
								 .execute();
		if (response.statusCode() == 200) {
			String bodyStr = response.body();
	        return JSONObject.parseObject(bodyStr);
	    }
	}
	catch (Exception e) {
		e.printStackTrace();
	}
	return null;
}

3 Post 请求

3.1 application/x-www-form-urlencoded

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doPost(Map param, String url) {
        try {
            Response response = Jsoup.connect(url)
            						 .ignoreContentType(true)
                    				 .header("Content-type", "application/x-www-form-urlencoded")
                    				 .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
                    				 .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封装的工具类
                   					 .method(Method.POST)
                    				 .data(param)
                    				 .timeout(25000)
                    				 .execute();
            if (response.statusCode() == 200) {
                String body = response.body();
                return JSONObject.parseObject(body);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
  }

3.2 text/plain

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

/**
 * 
 * @param param 数据格式:key1=value1&key2=value2
 * @param url 
 */
public JSONObject doPost(String param, String url) {
        try {
            Response response = Jsoup.connect(url)
            		.ignoreContentType(true)
                    .header("Content-type", "text/plain")
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
                    .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封装的工具类
                    .method(Method.POST)
                    .requestBody(param)
                    .timeout(15000)
                    .execute();
            if (response.statusCode() == 200) {
                String body = response.body();
                return JSONObject.parseObject(body);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
}

3.3 application/json

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doPost(String paramJsonStr, Proxy proxy, url) {
        try {
            Response response = Jsoup.connect(url)
                    				 .ignoreContentType(true)
                    				 .header("Content-Type", "application/json;charset=UTF-8")
                    				 .header("User-Agent", "Mozilla/5.0 (Linux; Android 5.1.1; sm-j200g Build/LMY48Z) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36")
                   					 .requestBody(paramJsonStr)
                  					 .proxy(proxy)  // 代理 IP
                    				 .method(Method.POST)
                    				 .timeout(10000)
                    				 .execute();
            if (response.statusCode() == 200) {
                String bodyStr = response.body();
                return JSONObject.parseObject(bodyStr);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
}

你可能感兴趣的:(常见问题)